Imported Upstream version 1.9.0upstream/1.9.0 submit/tizen/20200905.125700 accepted/tizen/unified/20200906.032650

author: Chunseok Lee <chunseok.lee@samsung.com> 2020-09-05 21:49:46 +0900
committer: Chunseok Lee <chunseok.lee@samsung.com> 2020-09-05 21:49:46 +0900
commit: 74476a2d0296bdad70a2f7f90bc7419a8b05bffd (patch)
tree: 3f991636c1e9423d38eb16a384c20b569b0d678e /runtime/onert
parent: 042b262b3633b6c0f577aed6cb4b980ad0c1dcf3 (diff)
download: nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.gz
nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.bz2
nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.zip
274 files changed, 5864 insertions, 11867 deletions
diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h
index ef3678b0d..9348df6ae 100644
--- a/runtime/onert/api/include/nnfw.h
+++ b/runtime/onert/api/include/nnfw.h
@@ -103,6 +103,8 @@ typedef enum {
   NNFW_STATUS_INVALID_STATE = 3,
   /** When it is out of memory */
   NNFW_STATUS_OUT_OF_MEMORY = 4,
+  /** When it was given an insufficient output buffer */
+  NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE = 5,
 } NNFW_STATUS;
 
 /**
diff --git a/runtime/onert/api/include/nnfw_experimental.h b/runtime/onert/api/include/nnfw_experimental.h
index 4cd5c585a..94f781988 100644
--- a/runtime/onert/api/include/nnfw_experimental.h
+++ b/runtime/onert/api/include/nnfw_experimental.h
@@ -62,4 +62,38 @@ typedef struct
 NNFW_STATUS nnfw_register_custom_op_info(nnfw_session *session, const char *id,
                                          custom_kernel_registration_info *info);
 
+/**
+ * @brief Get the input tensor index by name
+ *
+ * This function finds an input tensor of the given name.
+ * If found, the index value is set to the address that @c index points to, and returns
+ * @c NNFW_STATUS_NO_ERROR. Otherwise, @c index is unchanged and returns @c NNFW_STATUS_ERROR .
+ *
+ * @note If two or more input tensors are of the same name, the one with the lowest index is always
+ *       returned.
+ *
+ * @param[in]  session    the session object
+ * @param[in]  tensorname the name of the tensor to find, a null terminated char pointer string
+ * @param[out] index      the index to be ret
+ * @return     @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_input_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index);
+
+/**
+ * @brief Get the input tensor index by name
+ *
+ * This function finds an input tensor of the given name.
+ * If found, the index value is set to the address that @c index points to, and returns
+ * @c NNFW_STATUS_NO_ERROR. Otherwise, @c index is unchanged and returns @c NNFW_STATUS_ERROR .
+ *
+ * @note If two or more input tensors are of the same name, the one with the lowest index is always
+ *       returned.
+ *
+ * @param[in]  session    the session object
+ * @param[in]  tensorname the name of the tensor to find, a null terminated char pointer string
+ * @param[out] index      the index to be ret
+ * @return     @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_output_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index);
+
 #endif // __NNFW_EXPERIMENTAL_H__
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h
index 320271a26..42e43760b 100644
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
  * NNFW_VERSION is a uint32 value representing nnfw runtime version
  * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
  */
-#define NNFW_VERSION 0x01000800
+#define NNFW_VERSION 0x01000900
 
 #endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc
index d65158fd8..ff5e679da 100644
--- a/runtime/onert/api/src/nnfw_api.cc
+++ b/runtime/onert/api/src/nnfw_api.cc
@@ -33,6 +33,7 @@ STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_ERROR, 1);
 STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_UNEXPECTED_NULL, 2);
 STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_INVALID_STATE, 3);
 STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_OUT_OF_MEMORY, 4);
+STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE, 5);
 
 STATIC_ASSERT_ENUM_CHECK(NNFW_LAYOUT_NONE, 0);
 STATIC_ASSERT_ENUM_CHECK(NNFW_LAYOUT_CHANNELS_LAST, 1);
@@ -347,3 +348,15 @@ NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer,
   NNFW_RETURN_ERROR_IF_NULL(session);
   return session->load_circle_from_buffer(buffer, size);
 }
+
+NNFW_STATUS nnfw_input_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->input_tensorindex(tensorname, index);
+}
+
+NNFW_STATUS nnfw_output_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->output_tensorindex(tensorname, index);
+}
diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc
index eb0b743d3..81b40703f 100644
--- a/runtime/onert/api/src/nnfw_api_internal.cc
+++ b/runtime/onert/api/src/nnfw_api_internal.cc
@@ -18,6 +18,7 @@
 #include "CustomKernelRegistry.h"
 #include "compiler/Compiler.h"
 #include "util/ConfigSource.h"
+#include "util/Exceptions.h"
 #include "exec/Execution.h"
 #include "circle_loader.h"
 #include "tflite_loader.h"
@@ -37,6 +38,7 @@
 #define MAX_BACKEND_NAME_LENGTH 32
 #define MAX_OP_NAME_LENGTH 64
 #define MAX_PATH_LENGTH 1024
+#define MAX_TENSOR_NAME_LENGTH 64
 
 // Is null-terminating in length ?
 static bool null_terminating(const char *str, uint32_t length)
@@ -64,6 +66,32 @@ static onert::ir::Layout convertLayout(NNFW_LAYOUT layout)
   return onert::ir::Layout::UNKNOWN;
 }
 
+NNFW_STATUS getTensorIndexImpl(const onert::ir::Graph &graph, const char *tensorname,
+                               uint32_t *index, bool is_input)
+{
+  if (!tensorname || !index)
+    return NNFW_STATUS_UNEXPECTED_NULL;
+
+  if (!null_terminating(tensorname, MAX_TENSOR_NAME_LENGTH))
+  {
+    std::cerr << "nnpackage path is too long" << std::endl;
+    return NNFW_STATUS_ERROR;
+  }
+
+  auto ind_found = is_input ? graph.getInputIndex(tensorname) : graph.getOutputIndex(tensorname);
+
+  if (ind_found.undefined())
+  {
+    // Not found
+    return NNFW_STATUS_ERROR;
+  }
+  else
+  {
+    *index = ind_found.value();
+    return NNFW_STATUS_NO_ERROR;
+  }
+}
+
 nnfw_session::nnfw_session()
     : _subgraphs{nullptr}, _execution{nullptr},
       _kernel_registry{std::make_shared<onert::frontend::custom::KernelRegistry>()}
@@ -213,6 +241,12 @@ NNFW_STATUS nnfw_session::run()
   {
     _execution->execute();
   }
+  catch (const onert::InsufficientBufferSizeException &e)
+  {
+    // Currently insufficient buffer always means output buffer.
+    std::cerr << "Error during nnfw_session::run : " << e.what() << std::endl;
+    return NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE;
+  }
   catch (const std::exception &e)
   {
     std::cerr << "Error during nnfw_session::run : " << e.what() << std::endl;
@@ -447,26 +481,27 @@ NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti)
     }
   }
 
+  auto ind = primary_subgraph()->getInputs().at(index);
+  auto &input = primary_subgraph()->operands().at(ind);
+
+  onert::ir::Shape new_shape(ti.rank);
+  for (int32_t i = 0; i < ti.rank; i++)
+    new_shape.dim(i) = ti.dims[i];
+
+  // if passed shape is same with the shape of model, do nothing
+  if (input.info().shape() == new_shape)
+    return NNFW_STATUS_NO_ERROR;
+
   if (!isStatePreparedOrFinishedRun())
   {
     // In this case, if we apply input shape in primary_subgraph, it will propagate after
     // compilation and excution
-    auto ind = primary_subgraph()->getInputs().at(index);
-    auto &input = primary_subgraph()->operands().at(ind);
-
-    onert::ir::Shape new_shape(ti.rank);
-    for (int32_t i = 0; i < ti.rank; i++)
-      new_shape.dim(i) = ti.dims[i];
 
     // overwrite input shape with the shape from ti
     input.info().shape(new_shape);
   }
   else // when called after nnfw_session::prepare()
   {
-    onert::ir::Shape new_shape(ti.rank);
-    for (int32_t i = 0; i < ti.rank; i++)
-      new_shape.dim(i) = ti.dims[i];
-
     _execution->changeInputShape(onert::ir::IOIndex(index), new_shape);
   }
 
@@ -840,3 +875,13 @@ bool nnfw_session::isStatePreparedOrFinishedRun()
 {
   return isStatePrepared() || isStateFinishedRun();
 }
+
+NNFW_STATUS nnfw_session::input_tensorindex(const char *tensorname, uint32_t *index)
+{
+  return getTensorIndexImpl(*primary_subgraph(), tensorname, index, true);
+}
+
+NNFW_STATUS nnfw_session::output_tensorindex(const char *tensorname, uint32_t *index)
+{
+  return getTensorIndexImpl(*primary_subgraph(), tensorname, index, false);
+}
diff --git a/runtime/onert/api/src/nnfw_api_internal.h b/runtime/onert/api/src/nnfw_api_internal.h
index 1c3c3706f..604ba38b4 100644
--- a/runtime/onert/api/src/nnfw_api_internal.h
+++ b/runtime/onert/api/src/nnfw_api_internal.h
@@ -122,8 +122,6 @@ public:
   NNFW_STATUS input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti);
   NNFW_STATUS output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti);
 
-  NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func);
-
   NNFW_STATUS set_available_backends(const char *backends);
   NNFW_STATUS set_op_backend(const char *op, const char *backend);
 
@@ -133,9 +131,16 @@ public:
 
   NNFW_STATUS set_config(const char *key, const char *value);
   NNFW_STATUS get_config(const char *key, char *value, size_t value_size);
-
   NNFW_STATUS load_circle_from_buffer(uint8_t *buffer, size_t size);
 
+  //
+  // Experimental API
+  //
+
+  NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func);
+  NNFW_STATUS input_tensorindex(const char *tensorname, uint32_t *index);
+  NNFW_STATUS output_tensorindex(const char *tensorname, uint32_t *index);
+
 private:
   onert::ir::Graph *primary_subgraph();
   bool isStateInitialized();
diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h
index 8aaf516cd..5c5041378 100644
--- a/runtime/onert/backend/acl_cl/Backend.h
+++ b/runtime/onert/backend/acl_cl/Backend.h
@@ -25,6 +25,7 @@
 #include "KernelGenerator.h"
 #include "TensorManager.h"
 #include "Optimizer.h"
+#include "AclTensorRegistry.h"
 
 namespace onert
 {
@@ -47,10 +48,13 @@ public:
     const auto &operands = graph.operands();
     const auto &operations = graph.operations();
     auto context = std::make_unique<BackendContext>(this, &graph);
-    auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor));
+    auto tm = createTensorManager(is_linear_executor);
+    auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
+    auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+    context->tensor_registry = tr;
     context->tensor_builder = tb;
-    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
-    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb);
+    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
     context->tensor_register = nullptr;
     context->optimizer = std::make_shared<Optimizer>(context.get());
     return context;
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
index d7f5f8031..31f1c10eb 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
@@ -24,78 +24,17 @@ namespace acl_cl
 {
 
 ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
-                                         const std::shared_ptr<TensorBuilder> &tensor_builder)
-    : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+                                         const std::shared_ptr<ITensorRegistry> &tensor_reg)
+    : acl_common::AclConstantInitializer{operands, tensor_reg}
 {
   // DO NOTHING
 }
 
-void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
-{
-  assert(node.getInputs().size() > index);
-
-  const auto &input_index = node.getInputs().at(index);
-  const auto &input_obj = _operands.at(input_index);
-  registerCopyInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
-{
-  assert(node.getInputs().size() > index);
-
-  const auto &input_index = node.getInputs().at(index);
-  const auto &input_obj = _operands.at(input_index);
-  registerPermuteInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
-{
-  const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
-  const auto &block_size_obj = _operands.at(block_size_index);
-
-  if (block_size_obj.isConstant())
-  {
-    _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
-      assert(model_obj.data());
-      const auto &shape = model_obj.shape();
-      const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
-      assert(model_obj.shape().rank() == 1);
-      obj.access([&](ITensor &tensor) {
-        for (size_t i = 0; i < shape.num_elements(); ++i)
-        {
-          const int32_t value = base[shape.num_elements() - i - 1];
-          int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
-                                                      tensor.calcOffset({static_cast<int32_t>(i)}));
-          *into = value;
-        }
-      });
-    };
-  }
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
-  permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
-  copyInputInitialize(node, ir::operation::Conv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
-  permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
-  copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
-}
-
 void ConstantInitializer::visit(const ir::operation::EmbeddingLookup &node)
 {
   copyInputInitialize(node, ir::operation::EmbeddingLookup::LOOKUPS);
 }
 
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
-  copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
-  copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
-}
-
 void ConstantInitializer::visit(const ir::operation::Gather &node)
 {
   copyInputInitialize(node, ir::operation::Gather::INDICES);
@@ -107,33 +46,6 @@ void ConstantInitializer::visit(const ir::operation::HashtableLookup &node)
   copyInputInitialize(node, ir::operation::HashtableLookup::KEYS);
 }
 
-void ConstantInitializer::visit(const ir::operation::LSTM &node)
-{
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
-  copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
-  copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
-  copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::RNN &node)
-{
-  copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
-  copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::RNN::BIAS);
-}
-
 void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
 {
   const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE);
@@ -184,13 +96,6 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
   }
 }
 
-void ConstantInitializer::visit(const ir::operation::TransposeConv &node)
-{
-  const auto &kernel_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
-  const auto &kernel_obj = _operands.at(kernel_index);
-  registerPermuteInitializer(kernel_index, kernel_obj);
-}
-
 } // namespace acl_cl
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.h b/runtime/onert/backend/acl_cl/ConstantInitializer.h
index c51f72b11..4f894fd31 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.h
@@ -17,9 +17,7 @@
 #ifndef __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
 #define __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
 
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-#include "TensorBuilder.h"
+#include "AclConstantInitializer.h"
 
 namespace onert
 {
@@ -28,32 +26,18 @@ namespace backend
 namespace acl_cl
 {
 
-class ConstantInitializer : public IConstantInitializer
+class ConstantInitializer : public acl_common::AclConstantInitializer
 {
 public:
   ConstantInitializer(const ir::Operands &operands,
-                      const std::shared_ptr<TensorBuilder> &tensor_builder);
+                      const std::shared_ptr<ITensorRegistry> &tensor_reg);
 
 public:
-  void visit(const ir::operation::BatchToSpaceND &) override;
-  void visit(const ir::operation::Conv2D &) override;
-  void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::EmbeddingLookup &) override;
-  void visit(const ir::operation::FullyConnected &) override;
-  void visit(const ir::operation::Gather &) override;
-  void visit(const ir::operation::HashtableLookup &) override;
-  void visit(const ir::operation::LSTM &) override;
-  void visit(const ir::operation::RNN &) override;
-  void visit(const ir::operation::SpaceToBatchND &) override;
-  void visit(const ir::operation::TransposeConv &) override;
-
-private:
-  std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
-  void copyInputInitialize(const ir::Operation &node, uint32_t index);
-  void permuteInputInitialize(const ir::Operation &node, uint32_t index);
-
-private:
-  std::shared_ptr<TensorBuilder> _tensor_builder;
+  using acl_common::AclConstantInitializer::visit;
+  void visit(const ir::operation::EmbeddingLookup &) final;
+  void visit(const ir::operation::Gather &) final;
+  void visit(const ir::operation::HashtableLookup &) final;
+  void visit(const ir::operation::SpaceToBatchND &) final;
 };
 
 } // namespace acl_cl
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc
index a84f983b4..94489253d 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc
@@ -40,15 +40,16 @@ namespace backend
 namespace acl_cl
 {
 
-using ::onert::backend::acl_common::asAclClFunction;
+using ::onert::backend::acl_common::asAclFunction;
 using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
-    ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclClFunction>;
+    ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclFunction>;
 
-KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx,
-                                 const ir::Operations &operations_ctx,
-                                 const std::shared_ptr<TensorBuilder> &tensor_builder)
+KernelGenerator::KernelGenerator(
+    const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+    const std::shared_ptr<TensorBuilder> &tensor_builder,
+    const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
     : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
-      _current_op_seq_layout(ir::Layout::UNKNOWN)
+      _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
 {
   // DO NOTHING
 }
@@ -77,51 +78,69 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
   const auto block_size_index{
       node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
 
   assert(_ctx.at(block_size_index).data());
 
-  auto fn = std::make_unique<::arm_compute::CLBatchToSpaceLayer>();
+  auto fn = acl_common::generateLayer<arm_compute::CLBatchToSpaceLayer>(
+      ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
 
-  fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::Cast &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
+  const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  const auto activation = node.param().activation;
 
-  std::unique_ptr<::arm_compute::IFunction> fn;
-  if (ifm_tensor->data_type() == ofm_tensor->data_type())
-  {
-    auto l = std::make_unique<::arm_compute::CLCopy>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
 
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+  const auto act_info = acl_common::asActivationLayerInfo(activation);
 
-    fn = std::move(l);
-  }
-  else
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().arithmetic_type)
   {
-    auto l = std::make_unique<::arm_compute::CLCast>();
-
-    // TODO Support converting float to int32 as round down
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
-
-    fn = std::move(l);
+    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLArithmeticAddition>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+          arm_compute::ConvertPolicy::SATURATE, act_info);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLArithmeticSubtraction>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+          arm_compute::ConvertPolicy::SATURATE, act_info);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLPixelWiseMultiplication>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+          arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
+          act_info);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLArithmeticDivision>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), act_info);
+      break;
+    }
+    default:
+      assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
+      break;
   }
 
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Conv2D &node)
@@ -145,22 +164,20 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
                                             ker_width, ker_height);
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto ker_tensor = _tensor_builder->at(ker_index).get();
-  auto bias_tensor = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
 
   const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
   const auto act_info = acl_common::asActivationLayerInfo(activation);
 
-  auto fn = std::make_unique<::arm_compute::CLConvolutionLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
-                ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
-                ::arm_compute::Size2D(1U, 1U), act_info);
+  auto fn = acl_common::generateLayer<arm_compute::CLConvolutionLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+      ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+      ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
 
-  _return_fn = asAclClFunction(std::move(fn));
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
@@ -185,50 +202,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto multiplier = node.param().multiplier;
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto ker_tensor = _tensor_builder->at(ker_index).get();
-  auto bias_tensor = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
 
   const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
   const auto act_info = acl_common::asActivationLayerInfo(activation);
 
   {
-    auto fn = std::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
-
-    fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
-                  ofm_tensor->handle(), conv_info, multiplier, act_info);
+    auto fn = acl_common::generateLayer<arm_compute::CLDepthwiseConvolutionLayer>(
+        ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+        conv_info, multiplier, act_info);
 
-    _return_fn = asAclClFunction(std::move(fn));
+    _return_fn = asAclFunction(std::move(fn));
   }
 }
 
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
-  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
-
-  const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  const auto activation = node.param().activation;
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
-  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
-
-  const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  const auto activation = node.param().activation;
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
 void KernelGenerator::visit(const ir::operation::Concat &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
@@ -250,70 +240,44 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
     return;
   }
 
-  auto output_tensor = _tensor_builder->at(ofm_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get();
   std::vector<::arm_compute::ICLTensor *> input_tensors;
   for (auto &ifm_ind : input_indexes)
-    input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
+    input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
 
   std::unique_ptr<::arm_compute::IFunction> fn;
   if (input_indexes.size() < 2)
   {
-    auto l = std::make_unique<::arm_compute::CLCopy>();
-    l->configure(input_tensors.at(0), output_tensor->handle());
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensors.at(0),
+                                                        output_tensor->handle());
   }
   else
   {
-    auto l = std::make_unique<::arm_compute::CLConcatenateLayer>();
     const auto rank = _ctx.at(ofm_index).shape().rank();
     const auto frontend_layout = _current_op_seq_layout;
     const auto backend_layout = output_tensor->layout();
     const auto fixed_axis =
         acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
-    l->configure(input_tensors, output_tensor->handle(), fixed_axis);
-    fn = std::move(l);
+    fn = acl_common::generateLayer<::arm_compute::CLConcatenateLayer>(
+        input_tensors, output_tensor->handle(), fixed_axis);
   }
 
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
   const auto activation = node.param().activation;
 
-  auto fn = acl_common::kernelGenFullyConnected<acl_common::AclClFunction, ::arm_compute::ICLTensor,
+  auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor,
                                                 ::arm_compute::CLFullyConnectedReshapingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout);
+      node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
   _return_fn = std::make_unique<exec::FunctionSequence>(
       std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
 }
 
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLPixelWiseMultiplication>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
-                arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
-
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
 void KernelGenerator::visit(const ir::operation::Reduce &node)
 {
   const auto output_index{node.getOutputs().at(0)};
@@ -322,8 +286,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   const auto keep_dims{node.param().keep_dims};
   const auto reduce_type = node.param().reduce_type;
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
   // Convert to ACL axes taking into account negative values and possible duplicates.
   const auto &axes = _ctx.at(axes_index);
@@ -334,29 +298,21 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   std::unique_ptr<arm_compute::IFunction> fn;
   if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
   {
-    auto l = std::make_unique<::arm_compute::CLReduceMean>();
-
     const auto acl_axes =
         acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
-    l->configure(input_tensor->handle(), acl_axes, keep_dims, output_tensor->handle());
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::CLReduceMean>(input_tensor->handle(), acl_axes,
+                                                              keep_dims, output_tensor->handle());
   }
   else
   {
-    auto l = std::make_unique<::arm_compute::CLReduceOperation>(
-        _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
     const auto acl_axes = acl_common::asSet(axes, input_rank, frontend_layout, backend_layout);
-    l->configure(input_tensor->handle(), output_tensor->handle(), acl_axes, keep_dims,
-                 acl_common::convertReduceType(reduce_type));
 
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::CLReduceOperation>(
+        _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+        output_tensor->handle(), acl_axes, keep_dims, acl_common::convertReduceType(reduce_type));
   }
 
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Reshape &node)
@@ -364,8 +320,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
   // NOTE This operation must not be changed the layout from frontend to backend
   //      So, PermutationOperationPass makes layouts of frontend and backend the same.
@@ -376,13 +332,10 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   UNUSED_RELEASE(frontend_layout);
   UNUSED_RELEASE(backend_layout);
 
-  auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Squeeze &node)
@@ -398,32 +351,11 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
   (void)dims;
   (void)ndim;
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-  auto fn = std::make_unique<arm_compute::CLReshapeLayer>();
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-  auto acl_fn = asAclClFunction(std::move(fn));
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Tanh &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<arm_compute::CLActivationLayer>();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Softmax &node)
@@ -433,17 +365,14 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
 
   const auto beta = node.param().beta;
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLSoftmaxLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLSoftmaxLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+      output_tensor->handle(), beta);
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Slice &node)
@@ -453,8 +382,8 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
 
-  auto outputData_tensor = _tensor_builder->at(output_index).get();
-  auto inputData_tensor = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
   const auto frontend_layout = _current_op_seq_layout;
   const auto backend_layout = inputData_tensor->layout();
 
@@ -506,13 +435,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
     ends_set.set(i, ends[i]);
   }
 
-  auto fn = std::make_unique<::arm_compute::CLSlice>();
-
-  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLSlice>(
+      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::StridedSlice &node)
@@ -523,8 +449,8 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
 
-  auto outputData_tensor = _tensor_builder->at(output_index).get();
-  auto inputData_tensor = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
   const auto frontend_layout = _current_op_seq_layout;
   const auto backend_layout = inputData_tensor->layout();
 
@@ -597,14 +523,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
     strides_set.set(i, strides[i]);
   }
 
-  auto fn = std::make_unique<::arm_compute::CLStridedSlice>();
-
-  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
-                strides_set, begin_mask, end_mask, shrink_axis_mask);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLStridedSlice>(
+      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+      begin_mask, end_mask, shrink_axis_mask);
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Transpose &node)
@@ -615,8 +538,8 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
 
   const auto rank = _ctx.at(ifm_idx).shape().rank();
 
-  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
   const auto frontend_layout = _current_op_seq_layout;
   const auto backend_layout = ifm_tensor->layout();
 
@@ -625,93 +548,168 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
   auto backend_pv = ::onert::backend::acl_common::getARMComputePermutationVector(
       rank, pv, frontend_layout, backend_layout);
 
-  auto fn = std::make_unique<::arm_compute::CLPermute>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Add &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLArithmeticAddition>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-                arm_compute::ConvertPolicy::SATURATE);
+  auto fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(),
+                                                                ofm_tensor->handle(), backend_pv);
 
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+  _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::Sub &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
-  const auto activation = node.param().activation;
+  const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
-  auto fn = std::make_unique<::arm_compute::CLArithmeticSubtraction>();
+  const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
+      node.param().op_type, node.param().alpha, node.param().beta);
 
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-                arm_compute::ConvertPolicy::SATURATE);
+  auto fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), act_info);
 
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+  _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::Div &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
 {
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+  const auto output_index{node.getOutputs().at(0)};
+  const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
 
-  auto fn = std::make_unique<::arm_compute::CLArithmeticDivision>();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
 
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().op_type)
+  {
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLBinaryLogicalOp>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle(),
+          arm_compute::BinaryLogicalOperation::AND);
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLBitwiseOr>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLElementwiseMax>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLElementwiseMin>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    default:
+    {
+      std::string err_msg("acl_cl KernelGenerator : " + node.name() +
+                          "is not elementwise-binary operations");
+      assert(false && err_msg.c_str());
+      break;
+    }
+  }
 
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+  _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::Exp &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
+  const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
-  auto fn = std::make_unique<::arm_compute::CLExpLayer>();
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().op_type)
+  {
+    case ir::operation::ElementwiseUnary::Type::ABS:
+    {
+      const ::arm_compute::ActivationLayerInfo act_info{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
 
-  fn->configure(input_tensor->handle(), output_tensor->handle());
+      fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+          input_tensor->handle(), output_tensor->handle(), act_info);
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::CAST:
+    {
+      if (input_tensor->data_type() == output_tensor->data_type())
+      {
+        fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensor->handle(),
+                                                            output_tensor->handle());
+        ;
+      }
+      else
+      {
+        // TODO Support converting float to int32 as round down
+        fn = acl_common::generateLayer<arm_compute::CLCast>(
+            input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+      }
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLDequantizationLayer>(input_tensor->handle(),
+                                                                         output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::EXP:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLExpLayer>(input_tensor->handle(),
+                                                              output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::FLOOR:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLFloor>(input_tensor->handle(),
+                                                           output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLBitwiseNot>(input_tensor->handle(),
+                                                                output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::NEG:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLNeg>(input_tensor->handle(),
+                                                         output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::RSQRT:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLRsqrtLayer>(input_tensor->handle(),
+                                                                output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::SQRT:
+    {
+      const ::arm_compute::ActivationLayerInfo act_info{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
 
-  auto acl_fn = asAclClFunction(std::move(fn));
+      fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+          input_tensor->handle(), output_tensor->handle(), act_info);
+      break;
+    }
+    default:
+    {
+      throw std::runtime_error("acl_cl KernelGenerator : " + node.name() + "is not supported yet");
+      break;
+    }
+  }
+
+  auto acl_fn = asAclFunction(std::move(fn));
 
   _return_fn = std::move(acl_fn);
 }
@@ -721,16 +719,13 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
-  fn->configure(input_tensor->handle(), output_tensor->handle());
+  auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
 
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
@@ -740,67 +735,25 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
   const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
   const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto gamma_tensor = _tensor_builder->at(gamma_index).get();
-  auto beta_tensor = _tensor_builder->at(beta_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get();
+  auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get();
   auto epsilon = node.param().epsilon;
   auto activation = node.param().activation;
 
-  auto fn = std::make_unique<::arm_compute::CLInstanceNormalizationLayerEx>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
-                beta_tensor->handle(), epsilon);
+  auto fn = acl_common::generateLayer<arm_compute::CLInstanceNormalizationLayerEx>(
+      ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+      epsilon);
 
   _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Logistic &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
-
-  auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
-  const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input0_tensor = _tensor_builder->at(input0_index).get();
-  auto input1_tensor = _tensor_builder->at(input1_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLBinaryLogicalOp>();
-
-  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
-                ::arm_compute::BinaryLogicalOperation::AND);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::LSTM &node)
 {
-  _return_fn = acl_common::kernelGenLSTM<acl_common::AclClFunction, ::arm_compute::ICLTensor,
-                                         ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_builder);
+  _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ICLTensor,
+                                         ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_reg);
 }
 
 void KernelGenerator::visit(const ir::operation::Comparison &node)
@@ -811,18 +764,15 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
 
   const auto comparison_type = node.param().comparison_type;
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input0_tensor = _tensor_builder->at(input0_index).get();
-  auto input1_tensor = _tensor_builder->at(input1_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLComparison>();
-
-  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
-                (arm_compute::ComparisonOperation)comparison_type);
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get();
+  auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get();
 
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLComparison>(
+      input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+      (arm_compute::ComparisonOperation)comparison_type);
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Pack &node)
@@ -836,26 +786,24 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
   for (const auto &input_index : node.getInputs())
     input_indexes.emplace_back(input_index);
 
-  auto output = _tensor_builder->at(output_index).get()->handle();
+  auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
   std::vector<arm_compute::ICLTensor *> inputs;
   for (const auto &input_index : input_indexes)
-    inputs.emplace_back(_tensor_builder->at(input_index)->handle());
+    inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
 
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
+  const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout();
 
   if (axis < 0)
     axis += output_rank;
   axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
 
-  auto fn = std::make_unique<::arm_compute::CLStackLayer>();
-
   // Disable applied dim_correction
   std::vector<arm_compute::TensorShape> orig_inputs_acl_tensor_shapes;
   for (const auto &input_index : input_indexes)
   {
     size_t input_rank = _ctx.at(input_index).shape().rank();
-    const auto &input_tensor = _tensor_builder->at(input_index);
+    const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
     orig_inputs_acl_tensor_shapes.emplace_back(input_tensor->info()->tensor_shape());
     assert(input_rank == input_tensor->num_dimensions());
     if (input_rank != input_tensor->info()->num_dimensions())
@@ -866,7 +814,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
     }
   }
 
-  fn->configure(inputs, axis, output);
+  auto fn = acl_common::generateLayer<arm_compute::CLStackLayer>(inputs, axis, output);
 
   // Revert disabling applied dim_correction
   assert(inputs.size() == orig_inputs_acl_tensor_shapes.size());
@@ -875,7 +823,21 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
     inputs.at(i)->info()->set_tensor_shape(orig_inputs_acl_tensor_shapes.at(i));
   }
 
-  _return_fn = asAclClFunction(std::move(fn));
+  _return_fn = asAclFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
+{
+  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
+      node, _ctx, _tensor_reg, _current_op_seq_layout,
+      acl_common::convertPoolType(node.param().op_type));
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  const auto activation = node.param().activation;
+  _return_fn = std::make_unique<exec::FunctionSequence>(
+      asAclFunction(std::move(raw_fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Permute &node)
@@ -883,8 +845,8 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
   const auto ofm_idx{node.getOutputs().at(0)};
   const auto ifm_idx{node.getInputs().at(0)};
   const auto permute_type = node.getPermuteType();
-  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
   const auto rank = _ctx.at(ofm_idx).shape().rank();
   assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
 
@@ -895,70 +857,23 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
     // WHCN -> CWHN
     pv = arm_compute::PermutationVector{2, 0, 1};
 
-    auto l = std::make_unique<::arm_compute::CLPermute>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::CLPermute>(ifm_tensor->handle(),
+                                                           ofm_tensor->handle(), pv);
   }
   else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
   {
     // CWHN -> WHCN
     pv = arm_compute::PermutationVector{1, 2, 0};
 
-    auto l = std::make_unique<::arm_compute::CLPermute>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(),
+                                                             ofm_tensor->handle(), pv);
   }
   else
   {
-    auto l = std::make_unique<::arm_compute::CLCopy>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::CLCopy>(ifm_tensor->handle(), ofm_tensor->handle());
   }
 
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLRsqrtLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-  _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<arm_compute::CLActivationLayer>();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
@@ -967,58 +882,32 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
 
   const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
-  auto fn = std::make_unique<::arm_compute::CLScale>();
+  auto fn = acl_common::generateLayer<arm_compute::CLScale>(
+      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
+      ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
+      ::arm_compute::SamplingPolicy::TOP_LEFT);
 
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
-                ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
-                ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU1 &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
-  auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
+void KernelGenerator::visit(const ir::operation::ResizeNearestNeighbor &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
 
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
+  const auto ifm_index{node.getInputs().at(ir::operation::ResizeNearestNeighbor::Input::INPUT)};
 
-  auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
+  auto fn = acl_common::generateLayer<arm_compute::CLScale>(
+      ifm_tensor->handle(), ofm_tensor->handle(),
+      ::arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, ::arm_compute::BorderMode::REPLICATE,
+      ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
 
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::RNN &node)
@@ -1036,43 +925,25 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
 
   const auto activation = node.param().activation;
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get();
 
-  auto input_tensor = _tensor_builder->at(input_index).get();
-  auto weights_tensor = _tensor_builder->at(weights_index).get();
-  auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
-  auto bias_tensor = _tensor_builder->at(bias_index).get();
-  auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get();
+  auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get();
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+  auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get();
   auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
 
-  auto copy_layer = std::make_unique<::arm_compute::CLCopy>();
-  copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
-  _return_fn = asAclClFunction(std::move(copy_layer));
+  auto copy_layer = acl_common::generateLayer<arm_compute::CLCopy>(
+      hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+  _return_fn = asAclFunction(std::move(copy_layer));
 
-  auto fn = std::make_unique<::arm_compute::CLRNNLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-  fn->configure(input_tensor->handle(), weights_tensor->handle(),
-                recurrent_weights_tensor->handle(), bias_tensor->handle(),
-                hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
-  _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Floor &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLFloor>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  auto fn = acl_common::generateLayer<arm_compute::CLRNNLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+      weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+      hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
@@ -1083,24 +954,19 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
       node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
   const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
-  auto paddings_tensor = _tensor_builder->at(paddings_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
+  auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get();
 
   assert(_ctx.at(block_size_index).data());
   assert(_ctx.at(paddings_index).data());
 
-  std::unique_ptr<::arm_compute::IFunction> fn;
-
-  auto l = std::make_unique<::arm_compute::CLSpaceToBatchLayer>();
-  l->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
-               ofm_tensor->handle());
-  fn = std::move(l);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLSpaceToBatchLayer>(
+      ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+      ofm_tensor->handle());
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
@@ -1110,29 +976,13 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
 
   auto block_size = node.param().block_size;
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLSpaceToDepthLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
-void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
-{
-  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
+  auto fn = acl_common::generateLayer<arm_compute::CLSpaceToDepthLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), block_size);
 
-  const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  const auto activation = node.param().activation;
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
@@ -1141,17 +991,14 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
   const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
   const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
-  auto values_tensor = _tensor_builder->at(values_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLEmbeddingLookup>();
-
-  fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+  auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
 
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLEmbeddingLookup>(
+      values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::L2Normalization &node)
@@ -1173,19 +1020,16 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
   float beta = 0.5f;                             // pow(reduction, -0.5) = 1 / sqrt(reduction)
   float bias = 0.0f;                             // Don't offset the reduction.
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
   const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
                                                                radius, alpha, beta, bias, false);
 
-  auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
@@ -1197,21 +1041,18 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
   const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
   const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto hits_tensor = _tensor_builder->at(hits_index).get();
-
-  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
-  auto keys_tensor = _tensor_builder->at(keys_index).get();
-  auto values_tensor = _tensor_builder->at(values_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLHashtableLookup>();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get();
 
-  fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
-                output_tensor->handle(), hits_tensor->handle());
+  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+  auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get();
+  auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
 
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLHashtableLookup>(
+      lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+      output_tensor->handle(), hits_tensor->handle());
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::PReLU &node)
@@ -1220,17 +1061,14 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
   const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
   const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto alpha_tensor = _tensor_builder->at(alpha_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLPReluLayer>();
-
-  fn->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get();
 
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLPReluLayer>(
+      ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::TransposeConv &node)
@@ -1258,77 +1096,18 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
     invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
   }
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto ker_tensor = _tensor_builder->at(ker_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
 
   const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
 
-  auto fn = std::make_unique<::arm_compute::CLTransposeConvLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
-                tconv_info, invalid_horizontal, invalid_vertical);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::SQRT &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
-
-  auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
-  const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input0_tensor = _tensor_builder->at(input0_index).get();
-  auto input1_tensor = _tensor_builder->at(input1_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLBitwiseOr>();
-
-  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLBitwiseNot>();
+  auto fn = acl_common::generateLayer<arm_compute::CLTransposeConvLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+      ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, invalid_horizontal,
+      invalid_vertical);
 
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
@@ -1337,17 +1116,14 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
 
-  auto fn = std::make_unique<::arm_compute::CLElementwiseSquaredDiff>();
+  auto fn = acl_common::generateLayer<arm_compute::CLElementwiseSquaredDiff>(
+      lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
 
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::TopKV2 &node)
@@ -1364,17 +1140,14 @@ void KernelGenerator::visit(const ir::operation::TopKV2 &node)
 
   const auto k = node.param().k;
 
-  auto values_tensor = _tensor_builder->at(outputValues_index).get();
-  auto indices_tensor = _tensor_builder->at(outputIndices_index).get();
-  auto input_tensor = _tensor_builder->at(inputData_index).get();
+  auto values_tensor = _tensor_reg->getAclTensor(outputValues_index).get();
+  auto indices_tensor = _tensor_reg->getAclTensor(outputIndices_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(inputData_index).get();
 
-  auto fn = std::make_unique<::arm_compute::CLTopKV2>();
+  auto fn = acl_common::generateLayer<arm_compute::CLTopKV2>(
+      input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
 
-  fn->configure(input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Gather &node)
@@ -1389,9 +1162,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
   const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto indices_tensor = _tensor_builder->at(indices_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get();
 
   // NOTE The frontend layout and backend layout must be the same for this operation.
   //      If not the same, we have to add a stage(?) to perform permutation of output tensor. It
@@ -1407,8 +1180,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   assert(backend_layout == indices_tensor->layout());
   assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
 
-  auto fn = std::make_unique<::arm_compute::CLGatherEx>();
-
   // input is n-D, indices k-D, output is (n + k - 1)-D
   size_t n = ifm_rank;
   assert(n == ifm_tensor->num_dimensions());
@@ -1433,52 +1204,14 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
         acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
   }
 
-  fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+  auto fn = acl_common::generateLayer<arm_compute::CLGatherEx>(
+      ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
 
   // Revert disabling applied dim_correction
   ifm_tensor->info()->set_tensor_shape(orig_ifm_acl_tensor_shape);
   indices_tensor->info()->set_tensor_shape(orig_indice_acl_tensor_shape);
 
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLNeg>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
-
-  auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::ArgMax &node)
@@ -1491,8 +1224,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
 
   assert((ifm_shape.rank() - 1) == ofm_shape.rank());
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
   const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
   auto frontend_layout = _current_op_seq_layout;
   auto backend_layout = ifm_tensor->layout();
@@ -1506,31 +1239,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
   auto acl_axis =
       acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
 
-  auto fn = std::make_unique<::arm_compute::CLArgMinMaxLayer>();
+  auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayer>(
+      ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
+      ::arm_compute::ReductionOperation::ARG_IDX_MAX);
 
-  fn->configure(ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
-                ::arm_compute::ReductionOperation::ARG_IDX_MAX);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Dequantize &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLDequantizationLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
@@ -1544,19 +1257,16 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
   auto beta = node.param().beta;
   auto bias = node.param().bias;
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
   const auto norm_info = ::arm_compute::NormalizationLayerInfo(
       ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
 
-  auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
@@ -1567,16 +1277,13 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
   auto block_size = node.param().block_size;
   assert(block_size > 0);
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLDepthToSpaceLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLDepthToSpaceLayer>(
+      input_tensor->handle(), output_tensor->handle(), block_size);
 
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Split &node)
@@ -1590,10 +1297,10 @@ void KernelGenerator::visit(const ir::operation::Split &node)
   for (const auto &output : node.getOutputs())
     output_indexes.emplace_back(output);
 
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
   std::vector<arm_compute::ICLTensor *> output_tensors;
   for (const auto &ofm_ind : output_indexes)
-    output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+    output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle());
 
   const auto frontend_layout = _current_op_seq_layout;
   const auto backend_layout = ifm_tensor->layout();
@@ -1602,11 +1309,10 @@ void KernelGenerator::visit(const ir::operation::Split &node)
     axis += ifm_rank;
   axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
 
-  auto fn = std::make_unique<::arm_compute::CLSplit>();
-
-  fn->configure(ifm_tensor->handle(), output_tensors, axis);
+  auto fn =
+      acl_common::generateLayer<arm_compute::CLSplit>(ifm_tensor->handle(), output_tensors, axis);
 
-  _return_fn = asAclClFunction(std::move(fn));
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Unpack &node)
@@ -1620,13 +1326,13 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
   for (const auto &output_index : node.getOutputs())
     output_indexes.emplace_back(output_index);
 
-  auto input = _tensor_builder->at(input_index).get()->handle();
+  auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
   std::vector<arm_compute::ICLTensor *> outputs;
   for (const auto &output_index : output_indexes)
-    outputs.emplace_back(_tensor_builder->at(output_index)->handle());
+    outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
 
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+  const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
   if (axis < 0)
     axis += input_rank;
   axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
@@ -1636,7 +1342,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
   for (const auto &output_index : output_indexes)
   {
     size_t output_rank = _ctx.at(output_index).shape().rank();
-    const auto &output_tensor = _tensor_builder->at(output_index);
+    const auto &output_tensor = _tensor_reg->getAclTensor(output_index);
     orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
     assert(output_rank == output_tensor->num_dimensions());
     if (output_rank != output_tensor->info()->num_dimensions())
@@ -1647,11 +1353,9 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
     }
   }
 
-  auto fn = std::make_unique<::arm_compute::CLUnstack>();
-
-  fn->configure(input, outputs, axis);
+  auto fn = acl_common::generateLayer<arm_compute::CLUnstack>(input, outputs, axis);
 
-  _return_fn = asAclClFunction(std::move(fn));
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Pad &node)
@@ -1669,11 +1373,11 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
   auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset());
   const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info);
 
-  auto input = _tensor_builder->at(input_index).get()->handle();
-  auto output = _tensor_builder->at(output_index).get()->handle();
+  auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
+  auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
 
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+  const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
 
   ::arm_compute::PaddingList padding_list;
   padding_list.resize(rank);
@@ -1685,11 +1389,10 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
         acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
     padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
   }
-  auto fn = std::make_unique<::arm_compute::CLPadLayer>();
 
   // Disable applied dim_correction
   size_t input_rank = _ctx.at(input_index).shape().rank();
-  const auto &input_tensor = _tensor_builder->at(input_index);
+  const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
   assert(input_rank == input_tensor->num_dimensions());
   if (input_rank != input_tensor->info()->num_dimensions())
   {
@@ -1698,50 +1401,13 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
         _ctx.at(input_index).shape(), frontend_layout, backend_layout, false));
   }
 
-  fn->configure(input, output, padding_list, pixel_value);
+  auto fn =
+      acl_common::generateLayer<arm_compute::CLPadLayer>(input, output, padding_list, pixel_value);
 
   // Do not revert disabling applied dim_correction CLPadKernel has cl kernel for 4-dimension
   // It would produce a mistach of result
 
-  _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLElementwiseMin>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLElementwiseMax>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
@@ -1749,17 +1415,13 @@ void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp32ToFp16::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
-                0);
+  auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
 
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
@@ -1767,17 +1429,13 @@ void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp16ToFp32::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
-                0);
+  auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
 
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 } // namespace acl_cl
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.h b/runtime/onert/backend/acl_cl/KernelGenerator.h
index 1e3b06489..d188d6d83 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.h
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.h
@@ -21,6 +21,8 @@
 
 #include "ir/Operands.h"
 #include "TensorBuilder.h"
+#include "AclTensorRegistry.h"
+#include "TensorManager.h"
 
 namespace onert
 {
@@ -33,70 +35,52 @@ class KernelGenerator : public IKernelGenerator
 {
 public:
   KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-                  const std::shared_ptr<TensorBuilder> &tensor_builder);
+                  const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
 
   void visit(const ir::OpSequence &) override;
   void visit(const ir::operation::BatchToSpaceND &) override;
+  void visit(const ir::operation::BinaryArithmetic &) override;
   void visit(const ir::operation::Conv2D &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::MaxPool2D &) override;
-  void visit(const ir::operation::AvgPool2D &) override;
   void visit(const ir::operation::Concat &) override;
   void visit(const ir::operation::FullyConnected &) override;
-  void visit(const ir::operation::Mul &) override;
   void visit(const ir::operation::Reduce &) override;
   void visit(const ir::operation::Reshape &) override;
   void visit(const ir::operation::Squeeze &) override;
-  void visit(const ir::operation::Tanh &) override;
   void visit(const ir::operation::Softmax &) override;
   void visit(const ir::operation::Slice &) override;
   void visit(const ir::operation::StridedSlice &) override;
   void visit(const ir::operation::Transpose &) override;
-  void visit(const ir::operation::Add &) override;
-  void visit(const ir::operation::Sub &) override;
-  void visit(const ir::operation::Cast &) override;
-  void visit(const ir::operation::Div &) override;
-  void visit(const ir::operation::Exp &) override;
+  void visit(const ir::operation::ElementwiseActivation &) override;
+  void visit(const ir::operation::ElementwiseBinary &) override;
+  void visit(const ir::operation::ElementwiseUnary &) override;
   void visit(const ir::operation::ExpandDims &) override;
   void visit(const ir::operation::InstanceNorm &) override;
-  void visit(const ir::operation::Logistic &) override;
   void visit(const ir::operation::Comparison &) override;
-  void visit(const ir::operation::LogicalAnd &) override;
   void visit(const ir::operation::LSTM &) override;
   void visit(const ir::operation::Pack &) override;
+  void visit(const ir::operation::Pool2D &) override;
   void visit(const ir::operation::Permute &) override;
-  void visit(const ir::operation::RSQRT &) override;
-  void visit(const ir::operation::ReLU &) override;
   void visit(const ir::operation::ResizeBilinear &) override;
-  void visit(const ir::operation::ReLU1 &) override;
-  void visit(const ir::operation::ReLU6 &) override;
+  void visit(const ir::operation::ResizeNearestNeighbor &) override;
   void visit(const ir::operation::RNN &) override;
-  void visit(const ir::operation::Floor &) override;
   void visit(const ir::operation::SpaceToBatchND &) override;
   void visit(const ir::operation::SpaceToDepth &) override;
-  void visit(const ir::operation::L2Pool2D &) override;
   void visit(const ir::operation::EmbeddingLookup &) override;
   void visit(const ir::operation::L2Normalization &) override;
   void visit(const ir::operation::HashtableLookup &) override;
   void visit(const ir::operation::PReLU &) override;
   void visit(const ir::operation::TransposeConv &) override;
-  void visit(const ir::operation::SQRT &) override;
-  void visit(const ir::operation::LogicalOr &) override;
-  void visit(const ir::operation::LogicalNot &) override;
   void visit(const ir::operation::SquaredDifference &) override;
   void visit(const ir::operation::TopKV2 &) override;
   void visit(const ir::operation::Gather &) override;
-  void visit(const ir::operation::Neg &) override;
-  void visit(const ir::operation::Abs &) override;
   void visit(const ir::operation::ArgMax &) override;
-  void visit(const ir::operation::Dequantize &) override;
   void visit(const ir::operation::LocalResponseNormalization &) override;
   void visit(const ir::operation::DepthToSpace &) override;
   void visit(const ir::operation::Split &) override;
   void visit(const ir::operation::Unpack &) override;
   void visit(const ir::operation::Pad &) override;
-  void visit(const ir::operation::Min &) override;
-  void visit(const ir::operation::Max &) override;
   void visit(const ir::operation::ConvertFp32ToFp16 &) override;
   void visit(const ir::operation::ConvertFp16ToFp32 &) override;
 
@@ -104,6 +88,7 @@ private:
   const ir::Operands &_ctx;
   const ir::Operations &_operations_ctx;
   std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
   ir::Layout _current_op_seq_layout;
 };
 
diff --git a/runtime/onert/backend/acl_cl/Optimizer.cc b/runtime/onert/backend/acl_cl/Optimizer.cc
index 6ba3143e8..9134d3fb8 100644
--- a/runtime/onert/backend/acl_cl/Optimizer.cc
+++ b/runtime/onert/backend/acl_cl/Optimizer.cc
@@ -19,7 +19,7 @@
 #include "ParentInfo.h"
 
 #include <cassert>
-#include <ir/LoweredGraph.h>
+#include <compiler/LoweredGraph.h>
 #include <util/logging.h>
 #include "AclSubTensorAnalyzer.h"
 
diff --git a/runtime/onert/backend/acl_cl/TensorManager.h b/runtime/onert/backend/acl_cl/TensorManager.h
index bdbd0364e..ab295dbec 100644
--- a/runtime/onert/backend/acl_cl/TensorManager.h
+++ b/runtime/onert/backend/acl_cl/TensorManager.h
@@ -56,7 +56,7 @@ using InternalBufferManager = acl_common::AclInternalBufferManager<
 using TensorManager =
     acl_common::AclTensorManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
 
-TensorManager *createTensorManager(bool is_linear_executor)
+inline TensorManager *createTensorManager(bool is_linear_executor)
 {
   if (is_linear_executor)
   {
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.cc b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
new file mode 100644
index 000000000..6ad5b7b69
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AclConstantInitializer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands,
+                                               const std::shared_ptr<ITensorRegistry> &tensor_reg)
+    : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
+{
+  // DO NOTHING
+}
+
+void AclConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
+{
+  assert(node.getInputs().size() > index);
+
+  const auto &input_index = node.getInputs().at(index);
+  const auto &input_obj = _operands.at(input_index);
+  registerCopyInitializer(input_index, input_obj);
+}
+
+void AclConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
+{
+  assert(node.getInputs().size() > index);
+
+  const auto &input_index = node.getInputs().at(index);
+  const auto &input_obj = _operands.at(input_index);
+  registerPermuteInitializer(input_index, input_obj);
+}
+
+void AclConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
+{
+  const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
+  const auto &block_size_obj = _operands.at(block_size_index);
+
+  if (block_size_obj.isConstant())
+  {
+    _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
+      assert(model_obj.data());
+      const auto &shape = model_obj.shape();
+      const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
+      assert(model_obj.shape().rank() == 1);
+      obj.access([&](ITensor &tensor) {
+        for (size_t i = 0; i < shape.num_elements(); ++i)
+        {
+          const int32_t value = base[shape.num_elements() - i - 1];
+          int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
+                                                      tensor.calcOffset({static_cast<int32_t>(i)}));
+          *into = value;
+        }
+      });
+    };
+  }
+}
+
+void AclConstantInitializer::visit(const ir::operation::Conv2D &node)
+{
+  permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
+  copyInputInitialize(node, ir::operation::Conv2D::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
+{
+  permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
+  copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::FullyConnected &node)
+{
+  copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
+  copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::LSTM &node)
+{
+  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
+  copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
+  copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
+  copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::RNN &node)
+{
+  copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
+  copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::RNN::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::TransposeConv &node)
+{
+  permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL);
+}
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h
new file mode 100644
index 000000000..52f4c54cf
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
+#define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
+
+#include <backend/IConstantInitializer.h>
+#include <ir/Operands.h>
+#include "AclTensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+class AclConstantInitializer : public IConstantInitializer
+{
+public:
+  AclConstantInitializer(const ir::Operands &operands,
+                         const std::shared_ptr<ITensorRegistry> &tensor_reg);
+
+public:
+  void visit(const ir::operation::BatchToSpaceND &) override;
+  void visit(const ir::operation::Conv2D &) override;
+  void visit(const ir::operation::DepthwiseConv2D &) override;
+  void visit(const ir::operation::FullyConnected &) override;
+  void visit(const ir::operation::LSTM &) override;
+  void visit(const ir::operation::RNN &) override;
+  void visit(const ir::operation::TransposeConv &) override;
+
+protected:
+  void copyInputInitialize(const ir::Operation &node, uint32_t index);
+  void permuteInputInitialize(const ir::Operation &node, uint32_t index);
+
+private:
+  std::shared_ptr<ITensorRegistry> tensor_registry() const final { return _tensor_reg; }
+
+protected:
+  std::shared_ptr<ITensorRegistry> _tensor_reg;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/acl_common/AclFunction.h b/runtime/onert/backend/acl_common/AclFunction.h
index 85b18e847..94b65863a 100644
--- a/runtime/onert/backend/acl_common/AclFunction.h
+++ b/runtime/onert/backend/acl_common/AclFunction.h
@@ -47,12 +47,6 @@ private:
   std::unique_ptr<::arm_compute::IFunction> _func;
 };
 
-class AclClFunction : public AclFunction
-{
-public:
-  using AclFunction::AclFunction;
-};
-
 } // namespace acl_common
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/acl_common/AclKernelGen.h b/runtime/onert/backend/acl_common/AclKernelGen.h
index 9f7ce3764..372ce689e 100644
--- a/runtime/onert/backend/acl_common/AclKernelGen.h
+++ b/runtime/onert/backend/acl_common/AclKernelGen.h
@@ -30,11 +30,32 @@ namespace backend
 namespace acl_common
 {
 
+template <typename Layer, typename... Args>
+std::unique_ptr<arm_compute::IFunction> generateLayer(Args &&... args)
+{
+  auto l = std::make_unique<Layer>();
+
+  l->configure(std::forward<Args>(args)...);
+
+  return l;
+}
+
+template <typename Layer, typename... Args>
+std::unique_ptr<arm_compute::IFunction>
+generateLayer(std::shared_ptr<arm_compute::IMemoryManager> memory_manager, Args &&... args)
+{
+  auto l = std::make_unique<Layer>(memory_manager);
+
+  l->configure(std::forward<Args>(args)...);
+
+  return l;
+}
+
 template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
-          typename T_TensorBuilder>
-std::unique_ptr<exec::IFunction>
-kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
-              const std::shared_ptr<T_TensorBuilder> &tensor_builder)
+          typename T_TensorRegistry>
+std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
+                                               const ir::Operands &operands,
+                                               const std::shared_ptr<T_TensorRegistry> &tensor_reg)
 {
   // TODO Support dynamic rnn
   // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
@@ -117,43 +138,44 @@ kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
   const auto projection_clip = projection_threshold;
   assert(cell_clip >= 0.f && projection_clip >= 0.f);
 
-  auto scratch_buffer_tensor = tensor_builder->at(scratch_buffer_index).get();
-  auto output_state_out_tensor = tensor_builder->at(output_state_out_index).get();
-  auto cell_state_out_tensor = tensor_builder->at(cell_state_out_index).get();
-  auto output_tensor = tensor_builder->at(output_index).get();
+  auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index).get();
+  auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index).get();
+  auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index).get();
+  auto output_tensor = tensor_reg->getAclTensor(output_index).get();
 
-  auto input_tensor = tensor_builder->at(input_index).get();
+  auto input_tensor = tensor_reg->getAclTensor(input_index).get();
 
-  auto input_to_forget_weights_tensor = tensor_builder->at(input_to_forget_weights_index).get();
-  auto input_to_cell_weights_tensor = tensor_builder->at(input_to_cell_weights_index).get();
-  auto input_to_output_weights_tensor = tensor_builder->at(input_to_output_weights_index).get();
+  auto input_to_forget_weights_tensor =
+      tensor_reg->getAclTensor(input_to_forget_weights_index).get();
+  auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index).get();
+  auto input_to_output_weights_tensor =
+      tensor_reg->getAclTensor(input_to_output_weights_index).get();
   auto recurrent_to_forget_weights_tensor =
-      tensor_builder->at(recurrent_to_forget_weights_index).get();
-  auto recurrent_to_cell_weights_tensor = tensor_builder->at(recurrent_to_cell_weights_index).get();
+      tensor_reg->getAclTensor(recurrent_to_forget_weights_index).get();
+  auto recurrent_to_cell_weights_tensor =
+      tensor_reg->getAclTensor(recurrent_to_cell_weights_index).get();
   auto recurrent_to_output_weights_tensor =
-      tensor_builder->at(recurrent_to_output_weights_index).get();
+      tensor_reg->getAclTensor(recurrent_to_output_weights_index).get();
 
-  auto forget_gate_bias_tensor = tensor_builder->at(forget_gate_bias_index).get();
-  auto cell_bias_tensor = tensor_builder->at(cell_bias_index).get();
-  auto output_gate_bias_tensor = tensor_builder->at(output_gate_bias_index).get();
-  auto output_state_in_tensor = tensor_builder->at(output_state_in_index).get();
-  auto cell_state_in_tensor = tensor_builder->at(cell_state_in_index).get();
+  auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index).get();
+  auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index).get();
+  auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index).get();
+  auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index).get();
+  auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index).get();
 
-  auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
-
-  auto fn = std::make_unique<T_ACLLayer>();
+  auto act_info = asActivationLayerInfo(activation);
 
   ::arm_compute::LSTMParams<T_Tensor> lstm_params{};
   if (has_cifg_param)
   {
     auto input_to_input_weights_tensor =
-        tensor_builder->at(input_to_input_weights_index).get(); // optional
+        tensor_reg->getAclTensor(input_to_input_weights_index).get(); // optional
     auto recurrent_to_input_weights_tensor =
-        tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
+        tensor_reg->getAclTensor(recurrent_to_input_weights_index).get(); // optional
     auto cell_to_input_weights_handle =
-        has_peephole_param ? tensor_builder->at(cell_to_input_weights_index).get()->handle()
+        has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index).get()->handle()
                            : nullptr; // optional (non-cifg && peephole)
-    auto input_gate_bias_tensor = tensor_builder->at(input_gate_bias_index).get(); // optional
+    auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index).get(); // optional
     lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
                                 recurrent_to_input_weights_tensor->handle(),
                                 cell_to_input_weights_handle, input_gate_bias_tensor->handle());
@@ -161,40 +183,42 @@ kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
   if (has_peephole_param)
   {
     auto cell_to_forget_weights_tensor =
-        tensor_builder->at(cell_to_forget_weights_index).get(); // optional
+        tensor_reg->getAclTensor(cell_to_forget_weights_index).get(); // optional
     auto cell_to_output_weights_tensor =
-        tensor_builder->at(cell_to_output_weights_index).get(); // optional
+        tensor_reg->getAclTensor(cell_to_output_weights_index).get(); // optional
     lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
                                     cell_to_output_weights_tensor->handle());
   }
   if (has_projection_param)
   {
-    auto projection_weights_tensor = tensor_builder->at(projection_weights_index).get(); // optional
-    auto projection_bias_handle = has_projection_bias
-                                      ? tensor_builder->at(projection_bias_index).get()->handle()
-                                      : nullptr; // optional
+    auto projection_weights_tensor =
+        tensor_reg->getAclTensor(projection_weights_index).get(); // optional
+    auto projection_bias_handle =
+        has_projection_bias ? tensor_reg->getAclTensor(projection_bias_index).get()->handle()
+                            : nullptr; // optional
     lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
   }
 
-  fn->configure(input_tensor->handle(), input_to_forget_weights_tensor->handle(),
-                input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
-                recurrent_to_forget_weights_tensor->handle(),
-                recurrent_to_cell_weights_tensor->handle(),
-                recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
-                cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
-                output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
-                scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
-                cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info,
-                cell_clip, projection_clip);
+  auto fn = generateLayer<T_ACLLayer>(
+      input_tensor->handle(), input_to_forget_weights_tensor->handle(),
+      input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
+      recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
+      recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
+      cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
+      output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
+      scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
+      cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info, cell_clip,
+      projection_clip);
 
   return std::make_unique<T_FunctionWrapper>(std::move(fn));
 }
 
 template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
-          typename T_TensorBuilder>
+          typename T_TensorBuilder, typename T_TensorRegistry>
 std::unique_ptr<exec::IFunction>
 kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Operands &operands,
-                        const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout)
+                        const std::shared_ptr<T_TensorBuilder> &tensor_builder,
+                        const std::shared_ptr<T_TensorRegistry> &tensor_reg, ir::Layout layout)
 {
   using ir::operation::FullyConnected;
 
@@ -236,16 +260,13 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
     reshape.dim(1) = input_size; /* W */
   }
 
-  auto output_tensor = tensor_builder->at(output_index).get();
-  const auto input_tensor = tensor_builder->at(input_index).get();
-  const auto weight_tensor = tensor_builder->at(weight_index).get();
-  const auto bias_tensor = tensor_builder->at(bias_index).get();
+  auto output_tensor = tensor_reg->getAclTensor(output_index).get();
+  const auto input_tensor = tensor_reg->getAclTensor(input_index).get();
+  const auto weight_tensor = tensor_reg->getAclTensor(weight_index).get();
+  const auto bias_tensor = tensor_reg->getAclTensor(bias_index).get();
   const auto frontend_layout = layout;
   const auto acl_layout = output_tensor->handle()->info()->data_layout();
 
-  auto fn =
-      std::make_unique<T_ACLLayer>(tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
   typename T_ACLLayer::KernelType kernel_type = T_ACLLayer::KernelType::GENERAL;
   if (operands.at(weight_index).isConstant())
   {
@@ -253,20 +274,18 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
     assert(operands.at(weight_index).data());
   }
 
-  fn->configure(
-      input_tensor->handle(), weight_tensor->handle(), bias_tensor->handle(),
-      output_tensor->handle(), needs_reshape,
-      ::onert::backend::acl_common::asTensorShape(
-          reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
-      kernel_type);
+  auto fn = generateLayer<T_ACLLayer>(
+      tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+      weight_tensor->handle(), bias_tensor->handle(), output_tensor->handle(), needs_reshape,
+      asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type);
 
   return std::make_unique<T_FunctionWrapper>(std::move(fn));
 }
 
-template <typename T_ACLLayer, typename T_PoolOp, typename T_TensorBuilder>
+template <typename T_ACLLayer, typename T_PoolOp, typename T_AclTensorRegistry>
 std::unique_ptr<::arm_compute::IFunction>
 kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
-                const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout,
+                const std::shared_ptr<T_AclTensorRegistry> &tensor_reg, ir::Layout layout,
                 ::arm_compute::PoolingType pooling_type)
 {
   const auto ofm_index{node.getOutputs().at(0)};
@@ -294,16 +313,14 @@ kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
   VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl;
   VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl;
 
-  auto ofm_tensor = tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = tensor_reg->getAclTensor(ifm_index).get();
 
   ::arm_compute::PoolingLayerInfo info{
       pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
-      acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
-
-  auto fn = std::make_unique<T_ACLLayer>();
+      asPadStrideInfo(padding, stride), true /* exclude_padding */};
 
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), info);
+  auto fn = generateLayer<T_ACLLayer>(ifm_tensor->handle(), ofm_tensor->handle(), info);
 
   return fn;
 }
diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h
index 6b03fdf7f..91452014b 100644
--- a/runtime/onert/backend/acl_common/AclTensorBuilder.h
+++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h
@@ -25,6 +25,7 @@
 #include "ir/OperandIndexMap.h"
 #include <ir/Operands.h>
 #include "AclTensorManager.h"
+#include "AclTensorRegistry.h"
 #include <memory>
 #include "ParentInfo.h"
 #include <util/Utils.h>
@@ -48,7 +49,8 @@ class AclTensorBuilder : public ITensorBuilder
 public:
   using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>;
 
-  AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr);
+  AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
+                   const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg);
 
   /**
    * @brief     Register tensor information to allocate on ACL-CL backend
@@ -63,19 +65,13 @@ public:
   void notifyLastUse(const ir::OperandIndex &) override;
 
   bool isRegistered(const ir::OperandIndex &) const override;
-  std::shared_ptr<backend::ITensorRegistry> tensorRegistry() override { return nullptr; }
 
   void prepare(void) override;
   void allocate() override;
   void postFunctionPrepare() override;
 
-  std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
-  void iterate(const IterateFunction &fn) override;
-
   std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
 
-  std::shared_ptr<T_ITensor> at(const ir::OperandIndex &ind);
-
   T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); }
 
   void setUsesCount(const ir::OperandIndex &index, size_t num_uses)
@@ -100,8 +96,6 @@ public:
    */
   bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child);
 
-  bool supportDynamicTensor() override { return false; }
-
 private:
   void buildTensors(void);
   ir::OperandIndex findRootParent(ir::OperandIndex index);
@@ -113,6 +107,7 @@ private:
   ir::OperandIndexMap<size_t> _uses_count_map;
 
   std::unique_ptr<T_AclTensorManager> _tensor_mgr;
+  std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> _tensor_reg;
 
   // for linear executor
   std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
@@ -140,9 +135,10 @@ namespace acl_common
 {
 
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands,
-                                                                     T_AclTensorManager *tensor_mgr)
-    : _operands{operands}, _tensor_mgr{tensor_mgr}
+AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(
+    const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
+    const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg)
+    : _operands{operands}, _tensor_mgr{tensor_mgr}, _tensor_reg{tensor_reg}
 {
   assert(_tensor_mgr);
 }
@@ -310,28 +306,6 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::postFunctionPrepare(voi
 }
 
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::shared_ptr<ITensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::tensorAt(const ir::OperandIndex &ind)
-{
-  return _tensor_mgr->at(ind);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::iterate(const IterateFunction &fn)
-{
-  _tensor_mgr->iterate(fn);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::shared_ptr<T_ITensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::at(const ir::OperandIndex &ind)
-{
-  auto ret = _tensor_mgr->at(ind);
-  assert(ret != nullptr);
-  return ret;
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 std::unique_ptr<ITensorManager>
 AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::releaseStaticTensorManager(void)
 {
diff --git a/runtime/onert/backend/acl_common/AclTensorRegistry.h b/runtime/onert/backend/acl_common/AclTensorRegistry.h
new file mode 100644
index 000000000..1ef9f4b35
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclTensorRegistry.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
+
+#include "backend/ITensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+/**
+ * @brief Tensor registry class for acl backends
+ *
+ * This is implemented as a wrapper of AclTensorManager.
+ */
+template <typename T_AclTensorManager> class AclTensorRegistry : public ITensorRegistry
+{
+public:
+  AclTensorRegistry(T_AclTensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {}
+
+  std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
+  {
+    return _tensor_mgr->at(ind);
+  }
+
+  std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
+  {
+    return getITensor(ind);
+  }
+
+  auto getAclTensor(const ir::OperandIndex &ind) { return _tensor_mgr->at(ind); }
+
+private:
+  T_AclTensorManager *_tensor_mgr;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/backend/acl_common/Convert.cc b/runtime/onert/backend/acl_common/Convert.cc
index a5bbe1691..67dcc8192 100644
--- a/runtime/onert/backend/acl_common/Convert.cc
+++ b/runtime/onert/backend/acl_common/Convert.cc
@@ -18,6 +18,7 @@
 
 #include "Swizzle.h"
 #include "ir/DataType.h"
+#include "ir/operation/ElementwiseActivation.h"
 #include <memory>
 
 namespace
@@ -177,6 +178,50 @@ namespace acl_common
   }
 }
 
+::arm_compute::ActivationLayerInfo
+asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, float alpha,
+                      float beta)
+{
+  switch (op_type)
+  {
+    case ir::operation::ElementwiseActivation::Type::RELU:
+      if (beta == 0.f)
+      {
+        if (alpha == ir::operation::ElementwiseActivation::infinity)
+        {
+          return ::arm_compute::ActivationLayerInfo{
+              ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+        }
+        else
+        {
+          return ::arm_compute::ActivationLayerInfo{
+              ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, alpha};
+        }
+      }
+      else
+      {
+        return ::arm_compute::ActivationLayerInfo{
+            ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, alpha, beta};
+      }
+    case ir::operation::ElementwiseActivation::Type::TANH:
+      return ::arm_compute::ActivationLayerInfo{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, alpha, beta};
+    case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+      // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
+      // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
+      // 0(always sigmoid) regardless of values of the parameter.
+      //      If ACL support non-sigmoid logistic, should fix param values.
+      return ::arm_compute::ActivationLayerInfo{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
+    case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
+      return ::arm_compute::ActivationLayerInfo{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
+    default:
+      throw std::runtime_error{"Not supported, yet"};
+      break;
+  }
+}
+
 arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank,
                                        ir::Layout frontend_layout, ir::Layout backend_layout)
 {
@@ -223,11 +268,6 @@ std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunct
   return std::make_unique<AclFunction>(std::move(layer));
 }
 
-std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer)
-{
-  return std::make_unique<AclClFunction>(std::move(layer));
-}
-
 ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout)
 {
   switch (data_layout)
@@ -265,6 +305,21 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type)
   }
 }
 
+arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir)
+{
+  switch (pool_type_ir)
+  {
+    case ir::operation::Pool2D::PoolType::AVG:
+      return arm_compute::PoolingType::AVG;
+    case ir::operation::Pool2D::PoolType::L2:
+      return arm_compute::PoolingType::L2;
+    case ir::operation::Pool2D::PoolType::MAX:
+      return arm_compute::PoolingType::MAX;
+    default:
+      throw std::runtime_error("convertPoolType: Not supported operation yet");
+  }
+}
+
 arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
 {
   switch (reduce_type_ir)
diff --git a/runtime/onert/backend/acl_common/Convert.h b/runtime/onert/backend/acl_common/Convert.h
index 9362098a5..380321c07 100644
--- a/runtime/onert/backend/acl_common/Convert.h
+++ b/runtime/onert/backend/acl_common/Convert.h
@@ -25,7 +25,9 @@
 #include "ir/Layout.h"
 #include "ir/InternalType.h"
 #include "ir/Operand.h"
+#include "ir/operation/Pool2D.h"
 #include "ir/operation/Reduce.h"
+#include "ir/operation/ElementwiseActivation.h"
 #include "ir/Shape.h"
 #include "ir/TypeInfo.h"
 #include "ir/Coordinates.h"
@@ -59,6 +61,9 @@ namespace acl_common
                                              const ir::Stride &stride);
 
 ::arm_compute::ActivationLayerInfo asActivationLayerInfo(ir::Activation act_code);
+::arm_compute::ActivationLayerInfo
+asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, float alpha,
+                      float beta);
 
 arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank,
                                        ir::Layout frontend_layout, ir::Layout backend_layout);
@@ -67,7 +72,6 @@ std::set<uint32_t> asSet(const ir::Operand &operand, int32_t rank, ir::Layout fr
                          ir::Layout backend_layout);
 
 std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunction> &&layer);
-std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer);
 
 template <typename T_Function>
 std::unique_ptr<T_Function> asFunction(std::unique_ptr<::arm_compute::IFunction> &&fn)
@@ -78,6 +82,7 @@ std::unique_ptr<T_Function> asFunction(std::unique_ptr<::arm_compute::IFunction>
 ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout);
 ir::DataType asRuntimeDataType(::arm_compute::DataType data_type);
 
+arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir);
 arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir);
 
 } // namespace acl_common
diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h
index a0b145e19..35d6e4e8e 100644
--- a/runtime/onert/backend/acl_neon/Backend.h
+++ b/runtime/onert/backend/acl_neon/Backend.h
@@ -48,10 +48,13 @@ public:
     const auto &operands = graph.operands();
     const auto &operations = graph.operations();
     auto context = std::make_unique<BackendContext>(this, &graph);
-    auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor));
+    auto tm = createTensorManager(is_linear_executor);
+    auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
+    auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+    context->tensor_registry = tr;
     context->tensor_builder = tb;
-    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
-    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb);
+    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
     context->tensor_register = nullptr;
     context->optimizer = std::make_shared<Optimizer>(context.get());
     return context;
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.cc b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
index 4191b277f..79edb9ded 100644
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
@@ -24,100 +24,12 @@ namespace acl_neon
 {
 
 ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
-                                         const std::shared_ptr<TensorBuilder> &tensor_builder)
-    : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+                                         const std::shared_ptr<ITensorRegistry> &tensor_reg)
+    : acl_common::AclConstantInitializer{operands, tensor_reg}
 {
   // DO NOTHING
 }
 
-void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
-{
-  assert(node.getInputs().size() > index);
-
-  const auto &input_index = node.getInputs().at(index);
-  const auto &input_obj = _operands.at(input_index);
-  registerCopyInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
-{
-  assert(node.getInputs().size() > index);
-
-  const auto &input_index = node.getInputs().at(index);
-  const auto &input_obj = _operands.at(input_index);
-  registerPermuteInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
-{
-  const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
-  const auto &block_size_obj = _operands.at(block_size_index);
-
-  if (block_size_obj.isConstant())
-  {
-    _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
-      assert(model_obj.data());
-      const auto &shape = model_obj.shape();
-      const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
-      assert(model_obj.shape().rank() == 1);
-      obj.access([&](ITensor &tensor) {
-        for (size_t i = 0; i < shape.num_elements(); ++i)
-        {
-          const int32_t value = base[shape.num_elements() - i - 1];
-          int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
-                                                      tensor.calcOffset({static_cast<int32_t>(i)}));
-          *into = value;
-        }
-      });
-    };
-  }
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
-  permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
-  copyInputInitialize(node, ir::operation::Conv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
-  permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
-  copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
-  copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
-  copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::LSTM &node)
-{
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
-  copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
-  copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
-  copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::RNN &node)
-{
-  copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
-  copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::RNN::BIAS);
-}
-
 void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
 {
   const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE);
@@ -173,11 +85,6 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
   }
 }
 
-void ConstantInitializer::visit(const ir::operation::TransposeConv &node)
-{
-  permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL);
-}
-
 } // namespace acl_neon
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.h b/runtime/onert/backend/acl_neon/ConstantInitializer.h
index 6b4c1f145..c7d71cdcf 100644
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.h
@@ -17,9 +17,7 @@
 #ifndef __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
 #define __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
 
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-#include "TensorBuilder.h"
+#include "AclConstantInitializer.h"
 
 namespace onert
 {
@@ -28,29 +26,15 @@ namespace backend
 namespace acl_neon
 {
 
-class ConstantInitializer : public IConstantInitializer
+class ConstantInitializer : public acl_common::AclConstantInitializer
 {
 public:
   ConstantInitializer(const ir::Operands &operands,
-                      const std::shared_ptr<TensorBuilder> &tensor_builder);
+                      const std::shared_ptr<ITensorRegistry> &tensor_reg);
 
 public:
-  void visit(const ir::operation::BatchToSpaceND &) override;
-  void visit(const ir::operation::Conv2D &) override;
-  void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::FullyConnected &) override;
-  void visit(const ir::operation::LSTM &) override;
-  void visit(const ir::operation::RNN &) override;
-  void visit(const ir::operation::SpaceToBatchND &) override;
-  void visit(const ir::operation::TransposeConv &) override;
-
-private:
-  std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
-  void copyInputInitialize(const ir::Operation &node, uint32_t index);
-  void permuteInputInitialize(const ir::Operation &node, uint32_t index);
-
-private:
-  std::shared_ptr<TensorBuilder> _tensor_builder;
+  using acl_common::AclConstantInitializer::visit;
+  void visit(const ir::operation::SpaceToBatchND &node) final;
 };
 
 } // namespace acl_neon
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc
index 1195b83cc..6d53c1245 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc
@@ -44,11 +44,12 @@ using ::onert::backend::acl_common::asAclFunction;
 using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
     ::arm_compute::ITensor, ::arm_compute::NEActivationLayer, acl_common::AclFunction>;
 
-KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx,
-                                 const ir::Operations &operations_ctx,
-                                 const std::shared_ptr<TensorBuilder> &tensor_builder)
+KernelGenerator::KernelGenerator(
+    const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+    const std::shared_ptr<TensorBuilder> &tensor_builder,
+    const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
     : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
-      _current_op_seq_layout(ir::Layout::UNKNOWN)
+      _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
 {
   // DO NOTHING
 }
@@ -70,26 +71,6 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
   }
 }
 
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
-
-  auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
 void KernelGenerator::visit(const ir::operation::ArgMax &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
@@ -97,8 +78,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
 
   const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
   auto frontend_layout = _current_op_seq_layout;
   auto backend_layout = ifm_tensor->layout();
 
@@ -111,14 +92,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
   const auto fixed_axis =
       acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
 
-  auto fn = std::make_unique<::arm_compute::NEArgMinMaxLayer>();
-
-  fn->configure(ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
-                arm_compute::ReductionOperation::ARG_IDX_MAX);
+  auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>(
+      ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
+      arm_compute::ReductionOperation::ARG_IDX_MAX);
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
@@ -128,50 +106,67 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
   const auto block_size_index{
       node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
 
   assert(_ctx.at(block_size_index).data());
 
-  auto fn = std::make_unique<::arm_compute::NEBatchToSpaceLayer>();
-
-  fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
+  auto fn = acl_common::generateLayer<arm_compute::NEBatchToSpaceLayer>(
+      ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::Cast &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
+  const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  std::unique_ptr<::arm_compute::IFunction> fn;
-  if (ifm_tensor->data_type() == ofm_tensor->data_type())
-  {
-    auto l = std::make_unique<::arm_compute::NECopy>();
+  const auto activation = node.param().activation;
 
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
 
-    fn = std::move(l);
-  }
-  else
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().arithmetic_type)
   {
-    auto l = std::make_unique<::arm_compute::NECast>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
-
-    fn = std::move(l);
+    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEArithmeticAddition>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+          arm_compute::ConvertPolicy::SATURATE);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEArithmeticSubtraction>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+          arm_compute::ConvertPolicy::SATURATE);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+    {
+      // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
+      fn = acl_common::generateLayer<arm_compute::NEPixelWiseMultiplication>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+          arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEElementwiseDivision>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+      break;
+    }
+    default:
+      assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
+      break;
   }
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = std::make_unique<exec::FunctionSequence>(
+      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
 }
 
 void KernelGenerator::visit(const ir::operation::Conv2D &node)
@@ -195,20 +190,18 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
                                             ker_width, ker_height);
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto ker_tensor = _tensor_builder->at(ker_index).get();
-  auto bias_tensor = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
 
   const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
   const auto act_info = acl_common::asActivationLayerInfo(activation);
 
-  auto fn = std::make_unique<::arm_compute::NEConvolutionLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
-                ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
-                ::arm_compute::Size2D(1U, 1U), act_info);
+  auto fn = acl_common::generateLayer<arm_compute::NEConvolutionLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+      ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+      ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -221,16 +214,13 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
   auto block_size = node.param().block_size;
   assert(block_size > 0);
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
-  auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayer>();
+  auto fn = acl_common::generateLayer<arm_compute::NEDepthToSpaceLayer>(
+      input_tensor->handle(), output_tensor->handle(), block_size);
 
-  fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
@@ -255,67 +245,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto multiplier = node.param().multiplier;
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto ker_tensor = _tensor_builder->at(ker_index).get();
-  auto bias_tensor = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
 
   const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
   const auto act_info = acl_common::asActivationLayerInfo(activation);
 
   {
-    auto fn = std::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
-
-    fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
-                  ofm_tensor->handle(), conv_info, multiplier, act_info);
+    auto fn = acl_common::generateLayer<arm_compute::NEDepthwiseConvolutionLayer>(
+        ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+        conv_info, multiplier, act_info);
 
     _return_fn = asAclFunction(std::move(fn));
   }
 }
 
-void KernelGenerator::visit(const ir::operation::Dequantize &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEDequantizationLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
-  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
-
-  const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  const auto activation = node.param().activation;
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
-  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
-
-  const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  const auto activation = node.param().activation;
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
 void KernelGenerator::visit(const ir::operation::Concat &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
@@ -336,80 +282,223 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
     return;
   }
 
-  auto output_tensor = _tensor_builder->at(ofm_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get();
   std::vector<::arm_compute::ITensor *> input_tensors;
   for (const auto &ifm_ind : input_indexes)
-    input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
+    input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
 
   std::unique_ptr<::arm_compute::IFunction> fn;
   if (input_indexes.size() < 2)
   {
-    auto l = std::make_unique<::arm_compute::NECopy>();
-    l->configure(input_tensors.at(0), output_tensor->handle());
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensors.at(0),
+                                                        output_tensor->handle());
   }
   else
   {
-    auto l = std::make_unique<::arm_compute::NEConcatenateLayer>();
     const auto rank = _ctx.at(ofm_index).shape().rank();
     const auto frontend_layout = _current_op_seq_layout;
     const auto backend_layout = output_tensor->layout();
     const auto fixed_axis =
         acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
-    l->configure(input_tensors, output_tensor->handle(), fixed_axis);
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEConcatenateLayer>(
+        input_tensors, output_tensor->handle(), fixed_axis);
   }
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
 {
-  const auto output_index{node.getOutputs().at(0)};
-  const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
-  const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+
+  const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
+      node.param().op_type, node.param().alpha, node.param().beta);
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
-  auto values_tensor = _tensor_builder->at(values_index).get();
+  std::unique_ptr<arm_compute::IFunction> fn;
+  if (node.param().op_type == ir::operation::ElementwiseActivation::Type::LOGISTIC)
+  {
+    // NOTE NEActivationLayer can generate produce erroneous results. it were caused by
+    // 'vexpq_f32()'.
+    // The neon function returns a value outside of the limit of representation in float as 'NaN'
+    // instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
+    fn = acl_common::generateLayer<arm_compute::NEActivationLayerEx>(
+        ifm_tensor->handle(), ofm_tensor->handle(), act_info);
+  }
+  else
+  {
+    fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(ifm_tensor->handle(),
+                                                                   ofm_tensor->handle(), act_info);
+  }
 
-  auto fn = std::make_unique<::arm_compute::NEEmbeddingLookup>();
+  _return_fn = asAclFunction(std::move(fn));
+}
 
-  fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
 
-  auto acl_fn = asAclFunction(std::move(fn));
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
 
-  _return_fn = std::move(acl_fn);
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().op_type)
+  {
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+    {
+      fn = acl_common::generateLayer<arm_compute::NELogicalAnd>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+    {
+      fn = acl_common::generateLayer<arm_compute::NELogicalOr>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEElementwiseMax>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEElementwiseMin>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    default:
+    {
+      std::string err_msg("acl_neon KernelGenerator : " + node.name() +
+                          "is not elementwise-binary operations");
+      assert(false && err_msg.c_str());
+      break;
+    }
+  }
+  _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::Floor &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
 {
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
+
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().op_type)
+  {
+    case ir::operation::ElementwiseUnary::Type::ABS:
+    {
+      const ::arm_compute::ActivationLayerInfo act_info{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+      fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
+          input_tensor->handle(), output_tensor->handle(), act_info);
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::CAST:
+    {
+      if (input_tensor->data_type() == output_tensor->data_type())
+      {
+        fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensor->handle(),
+                                                            output_tensor->handle());
+      }
+      else
+      {
+        fn = acl_common::generateLayer<arm_compute::NECast>(
+            input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+      }
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEDequantizationLayer>(input_tensor->handle(),
+                                                                         output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::EXP:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEExpLayer>(input_tensor->handle(),
+                                                              output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::FLOOR:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEFloor>(input_tensor->handle(),
+                                                           output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEBitwiseNot>(input_tensor->handle(),
+                                                                output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::NEG:
+    {
+      fn = acl_common::generateLayer<arm_compute::NENegLayer>(input_tensor->handle(),
+                                                              output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::RSQRT:
+    {
+      fn = acl_common::generateLayer<arm_compute::NERsqrtLayer>(input_tensor->handle(),
+                                                                output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::SQRT:
+    {
+      const ::arm_compute::ActivationLayerInfo act_info{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
 
-  auto fn = std::make_unique<::arm_compute::NEFloor>();
+      fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
+          input_tensor->handle(), output_tensor->handle(), act_info);
+      break;
+    }
+    default:
+    {
+      throw std::runtime_error("acl_neon KernelGenerator : " + node.name() +
+                               "is not supported yet");
+      break;
+    }
+  }
+  _return_fn = asAclFunction(std::move(fn));
+}
 
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
+void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
+  const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
 
-  auto acl_fn = asAclFunction(std::move(fn));
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+  auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
 
-  _return_fn = std::move(acl_fn);
+  auto fn = acl_common::generateLayer<arm_compute::NEEmbeddingLookup>(
+      values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
   const auto activation = node.param().activation;
 
   auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
                                                 ::arm_compute::NEFullyConnectedReshapingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout);
+      node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
   _return_fn = std::make_unique<exec::FunctionSequence>(
       std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
 }
@@ -423,21 +512,18 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
   const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
   const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto hits_tensor = _tensor_builder->at(hits_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get();
 
-  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
-  auto keys_tensor = _tensor_builder->at(keys_index).get();
-  auto values_tensor = _tensor_builder->at(values_index).get();
+  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+  auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get();
+  auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
 
-  auto fn = std::make_unique<::arm_compute::NEHashtableLookup>();
+  auto fn = acl_common::generateLayer<arm_compute::NEHashtableLookup>(
+      lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+      output_tensor->handle(), hits_tensor->handle());
 
-  fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
-                output_tensor->handle(), hits_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Gather &node)
@@ -453,9 +539,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   // Converting in reverse order
   const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto indices_tensor = _tensor_builder->at(indices_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get();
   const auto backend_layout = ofm_tensor->layout();
   UNUSED_RELEASE(backend_layout);
 
@@ -471,8 +557,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   assert(backend_layout == indices_tensor->layout());
   assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
 
-  auto fn = std::make_unique<::arm_compute::NEGatherEx>();
-
   // input is n-D, indices k-D, output is (n + k - 1)-D
   size_t n = ifm_rank;
   assert(n == ifm_tensor->num_dimensions());
@@ -495,15 +579,14 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
         acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
   }
 
-  fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+  auto fn = acl_common::generateLayer<arm_compute::NEGatherEx>(
+      ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
 
   // acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
   // use arm_compute::TensorInfo::offset_element_in_bytes()
   // It would create an error when the kernel accesses high dimension that its value is 1
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
@@ -513,17 +596,16 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
   const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
   const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto gamma_tensor = _tensor_builder->at(gamma_index).get();
-  auto beta_tensor = _tensor_builder->at(beta_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get();
+  auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get();
   auto epsilon = node.param().epsilon;
   auto activation = node.param().activation;
 
-  auto fn = std::make_unique<::arm_compute::NEInstanceNormalizationLayerEx>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
-                beta_tensor->handle(), epsilon);
+  auto fn = acl_common::generateLayer<arm_compute::NEInstanceNormalizationLayerEx>(
+      ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+      epsilon);
 
   _return_fn = std::make_unique<exec::FunctionSequence>(
       asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
@@ -548,32 +630,16 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
   float beta = 0.5f;                             // pow(reduction, -0.5) = 1 / sqrt(reduction)
   float bias = 0.0f;                             // Don't offset the reduction.
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
   const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
                                                                radius, alpha, beta, bias, false);
 
-  auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
 
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
-{
-  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
-
-  const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  const auto activation = node.param().activation;
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
@@ -587,142 +653,22 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
   auto beta = node.param().beta;
   auto bias = node.param().bias;
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
   const auto norm_info = ::arm_compute::NormalizationLayerInfo(
       ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
 
-  auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
 
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
-  const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input0_tensor = _tensor_builder->at(input0_index).get();
-  auto input1_tensor = _tensor_builder->at(input1_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NELogicalAnd>();
-
-  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEBitwiseNot>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
-  const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input0_tensor = _tensor_builder->at(input0_index).get();
-  auto input1_tensor = _tensor_builder->at(input1_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NELogicalOr>();
-
-  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Logistic &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
-
-  // NOTE NEActivationLayer can generate produce erroneous results. it were caused by 'vexpq_f32()'.
-  // The neon function returns a value outside of the limit of representation in float as 'NaN'
-  // instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
-  auto fn = std::make_unique<::arm_compute::NEActivationLayerEx>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::LSTM &node)
 {
   _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ITensor,
-                                         ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_builder);
-}
-
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEPixelWiseMultiplication>();
-
-  // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
-                arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
-
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NENegLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+                                         ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_reg);
 }
 
 void KernelGenerator::visit(const ir::operation::Pack &node)
@@ -736,25 +682,23 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
   for (const auto &input_index : node.getInputs())
     input_indexes.emplace_back(input_index);
 
-  auto output = _tensor_builder->at(output_index).get()->handle();
+  auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
   std::vector<arm_compute::ITensor *> inputs;
   for (const auto &input_index : input_indexes)
-    inputs.emplace_back(_tensor_builder->at(input_index)->handle());
+    inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
 
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
+  const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout();
 
   if (axis < 0)
     axis += output_rank;
   axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
 
-  auto fn = std::make_unique<::arm_compute::NEStackLayer>();
-
   // Disable applied dim_correction
   for (const auto &input_index : input_indexes)
   {
     size_t input_rank = _ctx.at(input_index).shape().rank();
-    const auto &input_tensor = _tensor_builder->at(input_index);
+    const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
     assert(input_rank == input_tensor->num_dimensions());
     if (input_rank != input_tensor->info()->num_dimensions())
     {
@@ -764,7 +708,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
     }
   }
 
-  fn->configure(inputs, axis, output);
+  auto fn = acl_common::generateLayer<arm_compute::NEStackLayer>(inputs, axis, output);
 
   // acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
   // use arm_compute::TensorInfo::offset_element_in_bytes()
@@ -783,8 +727,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
   auto rank = _ctx.at(input_index).shape().rank();
   auto pad_base = _ctx.at(pad_index).data()->base();
 
-  auto input = _tensor_builder->at(input_index).get()->handle();
-  auto output = _tensor_builder->at(output_index).get()->handle();
+  auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
+  auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
 
   ::arm_compute::PaddingList padding_list;
   padding_list.resize(rank);
@@ -793,7 +737,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
     const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
 
     const auto frontend_layout = _current_op_seq_layout;
-    const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+    const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
     const auto axis =
         acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
     padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
@@ -807,19 +751,33 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
   const auto pixel_value =
       ::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
 
-  auto fn = std::make_unique<::arm_compute::NEPadLayer>();
-  fn->configure(input, output, padding_list, pixel_value);
+  auto fn =
+      acl_common::generateLayer<arm_compute::NEPadLayer>(input, output, padding_list, pixel_value);
 
   _return_fn = asAclFunction(std::move(fn));
 }
 
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
+{
+  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+      node, _ctx, _tensor_reg, _current_op_seq_layout,
+      acl_common::convertPoolType(node.param().op_type));
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  const auto activation = node.param().activation;
+  _return_fn = std::make_unique<exec::FunctionSequence>(
+      asAclFunction(std::move(raw_fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+}
+
 void KernelGenerator::visit(const ir::operation::Permute &node)
 {
   const auto ofm_idx{node.getOutputs().at(0)};
   const auto ifm_idx{node.getInputs().at(0)};
   const auto permute_type = node.getPermuteType();
-  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
   const auto rank = _ctx.at(ofm_idx).shape().rank();
   assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
 
@@ -830,35 +788,22 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
     // WHCN -> CWHN
     pv = arm_compute::PermutationVector{2, 0, 1};
 
-    auto l = std::make_unique<::arm_compute::NEPermute>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+                                                           ofm_tensor->handle(), pv);
   }
   else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
   {
     // CWHN -> WHCN
     pv = arm_compute::PermutationVector{1, 2, 0};
 
-    auto l = std::make_unique<::arm_compute::NEPermute>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+                                                           ofm_tensor->handle(), pv);
   }
   else
   {
-    auto l = std::make_unique<::arm_compute::NECopy>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NECopy>(ifm_tensor->handle(), ofm_tensor->handle());
   }
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::PReLU &node)
@@ -867,21 +812,14 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
   const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
   const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto alpha_tensor = _tensor_builder->at(alpha_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get();
 
-  std::unique_ptr<::arm_compute::IFunction> fn;
-
-  auto l = std::make_unique<::arm_compute::NEPReluLayer>();
-
-  l->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
-
-  fn = std::move(l);
+  auto fn = acl_common::generateLayer<arm_compute::NEPReluLayer>(
+      ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Reduce &node)
@@ -890,8 +828,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
   const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
   // Convert to ACL axes taking into account negative values and possible duplicates.
   const auto &axes = _ctx.at(axes_index);
@@ -906,93 +844,21 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   std::unique_ptr<::arm_compute::IFunction> fn;
   if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
   {
-    auto l = std::make_unique<::arm_compute::NEReduceMean>();
-
-    l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEReduceMean>(input_tensor->handle(), reduce_axes,
+                                                              keep_dims, output_tensor->handle());
   }
   else if (reduce_type == ir::operation::Reduce::ReduceType::SUM)
   {
-    auto l = std::make_unique<::arm_compute::NEReduceSum>();
-
-    l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEReduceSum>(input_tensor->handle(), reduce_axes,
+                                                             keep_dims, output_tensor->handle());
   }
   else
   {
-    auto l = std::make_unique<::arm_compute::NEReduceOperation>();
-
-    l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
-                 acl_common::convertReduceType(reduce_type));
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEReduceOperation>(
+        input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
+        acl_common::convertReduceType(reduce_type));
   }
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<arm_compute::NEActivationLayer>();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU1 &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
-  auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
-
-  auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Reshape &node)
@@ -1000,8 +866,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
   // NOTE This operation must not be changed the layout from frontend to backend
   //      So, PermutationOperationPass makes layouts of frontend and backend the same.
@@ -1012,13 +878,10 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   UNUSED_RELEASE(frontend_layout);
   UNUSED_RELEASE(backend_layout);
 
-  auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle());
+  auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
@@ -1027,18 +890,15 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
 
   const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEScale>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
-                ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
-                ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
+  auto fn = acl_common::generateLayer<arm_compute::NEScale>(
+      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
+      ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
+      ::arm_compute::SamplingPolicy::TOP_LEFT);
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::RNN &node)
@@ -1056,40 +916,24 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
 
   const auto activation = node.param().activation;
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get();
 
-  auto input_tensor = _tensor_builder->at(input_index).get();
-  auto weights_tensor = _tensor_builder->at(weights_index).get();
-  auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
-  auto bias_tensor = _tensor_builder->at(bias_index).get();
-  auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get();
+  auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get();
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+  auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get();
   auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
 
-  auto copy_layer = std::make_unique<::arm_compute::NECopy>();
-  copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+  auto copy_layer = acl_common::generateLayer<arm_compute::NECopy>(
+      hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
   _return_fn = asAclFunction(std::move(copy_layer));
 
-  auto fn = std::make_unique<::arm_compute::NERNNLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-  fn->configure(input_tensor->handle(), weights_tensor->handle(),
-                recurrent_weights_tensor->handle(), bias_tensor->handle(),
-                hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NERsqrtLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
+  auto fn = acl_common::generateLayer<arm_compute::NERNNLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+      weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+      hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
   _return_fn = asAclFunction(std::move(fn));
 }
 
@@ -1105,32 +949,11 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
   (void)dims;
   (void)ndim;
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-  auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-  auto acl_fn = asAclFunction(std::move(fn));
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Tanh &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<arm_compute::NEActivationLayer>();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Softmax &node)
@@ -1139,8 +962,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
   const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
   const auto beta = node.param().beta;
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
   const auto frontend_layout = _current_op_seq_layout;
   const auto backend_layout = input_tensor->layout();
 
@@ -1154,14 +977,11 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
         acl_common::asTensorShape(input.shape(), frontend_layout, backend_layout, false));
   }
 
-  auto fn = std::make_unique<::arm_compute::NESoftmaxLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
+  auto fn = acl_common::generateLayer<arm_compute::NESoftmaxLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+      output_tensor->handle(), beta);
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
@@ -1172,22 +992,19 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
       node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
   const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
-  auto paddings_tensor = _tensor_builder->at(paddings_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
+  auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get();
 
   assert(_ctx.at(block_size_index).data());
   assert(_ctx.at(paddings_index).data());
 
-  auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayer>();
-
-  fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
-                ofm_tensor->handle());
+  auto fn = acl_common::generateLayer<arm_compute::NESpaceToBatchLayer>(
+      ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+      ofm_tensor->handle());
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
@@ -1197,16 +1014,13 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
 
   auto block_size = node.param().block_size;
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayer>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
 
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
+  auto fn = acl_common::generateLayer<arm_compute::NESpaceToDepthLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), block_size);
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Split &node)
@@ -1221,10 +1035,10 @@ void KernelGenerator::visit(const ir::operation::Split &node)
   for (const auto &output : node.getOutputs())
     output_indexes.emplace_back(output);
 
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
   std::vector<arm_compute::ITensor *> output_tensors;
   for (const auto &ofm_ind : output_indexes)
-    output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+    output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle());
 
   const auto frontend_layout = _current_op_seq_layout;
   const auto backend_layout = ifm_tensor->layout();
@@ -1233,71 +1047,26 @@ void KernelGenerator::visit(const ir::operation::Split &node)
     axis += ifm_rank;
   axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
 
-  auto fn = std::make_unique<::arm_compute::NESplit>();
-
-  fn->configure(ifm_tensor->handle(), output_tensors, axis);
+  auto fn =
+      acl_common::generateLayer<arm_compute::NESplit>(ifm_tensor->handle(), output_tensors, axis);
 
   _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::SQRT &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
-
-  auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
 void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEElementwiseSquaredDiff>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
 
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+  auto fn = acl_common::generateLayer<arm_compute::NEElementwiseSquaredDiff>(
+      lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Sub &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEArithmeticSubtraction>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-                arm_compute::ConvertPolicy::SATURATE);
-
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Slice &node)
@@ -1307,8 +1076,8 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
 
-  auto outputData_tensor = _tensor_builder->at(output_index).get();
-  auto inputData_tensor = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
   const auto frontend_layout = _current_op_seq_layout;
   const auto backend_layout = inputData_tensor->layout();
 
@@ -1358,13 +1127,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
     ends_set.set(i, ends[i]);
   }
 
-  auto fn = std::make_unique<::arm_compute::NESlice>();
-
-  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
+  auto fn = acl_common::generateLayer<arm_compute::NESlice>(
+      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::StridedSlice &node)
@@ -1375,8 +1141,8 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
 
-  auto outputData_tensor = _tensor_builder->at(output_index).get();
-  auto inputData_tensor = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
   const auto frontend_layout = _current_op_seq_layout;
   const auto backend_layout = inputData_tensor->layout();
 
@@ -1445,14 +1211,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
     strides_set.set(i, strides[i]);
   }
 
-  auto fn = std::make_unique<::arm_compute::NEStridedSlice>();
+  auto fn = acl_common::generateLayer<arm_compute::NEStridedSlice>(
+      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+      begin_mask, end_mask, shrink_axis_mask);
 
-  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
-                strides_set, begin_mask, end_mask, shrink_axis_mask);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::TransposeConv &node)
@@ -1481,20 +1244,17 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
     invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
   }
 
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto ker_tensor = _tensor_builder->at(ker_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
 
   const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
 
-  auto fn = std::make_unique<::arm_compute::NETransposeConvLayer>();
+  auto fn = acl_common::generateLayer<arm_compute::NETransposeConvLayer>(
+      ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info,
+      invalid_horizontal, invalid_vertical);
 
-  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
-                tconv_info, invalid_horizontal, invalid_vertical);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Transpose &node)
@@ -1503,8 +1263,8 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
   const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
   const auto &perm{node.param().perm};
 
-  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
-  const auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+  const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
   const auto frontend_layout = _current_op_seq_layout;
   const auto backend_layout = ifm_tensor->layout();
 
@@ -1514,27 +1274,17 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
       rank, pv, frontend_layout, backend_layout);
 
   std::unique_ptr<::arm_compute::IFunction> fn;
-
   if (ifm_tensor->num_dimensions() <= 2 && ofm_tensor->num_dimensions() <= 2)
   {
-    auto l = std::make_unique<::arm_compute::NETranspose>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NETranspose>(ifm_tensor->handle(),
+                                                             ofm_tensor->handle());
   }
   else
   {
-    auto l = std::make_unique<::arm_compute::NEPermute>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+                                                           ofm_tensor->handle(), backend_pv);
   }
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Unpack &node)
@@ -1548,25 +1298,23 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
   for (const auto &output_index : node.getOutputs())
     output_indexes.emplace_back(output_index);
 
-  auto input = _tensor_builder->at(input_index).get()->handle();
+  auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
   std::vector<arm_compute::ITensor *> outputs;
   for (const auto &output_index : output_indexes)
-    outputs.emplace_back(_tensor_builder->at(output_index)->handle());
+    outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
 
   const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+  const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
   if (axis < 0)
     axis += input_rank;
   axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
 
-  auto fn = std::make_unique<::arm_compute::NEUnstack>();
-
   // Disable applied dim_correction
   std::vector<arm_compute::TensorShape> orig_outputs_acl_tensor_shapes;
   for (const auto &output_index : output_indexes)
   {
     size_t output_rank = _ctx.at(output_index).shape().rank();
-    const auto &output_tensor = _tensor_builder->at(output_index);
+    const auto &output_tensor = _tensor_reg->getAclTensor(output_index);
     orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
     assert(output_rank == output_tensor->num_dimensions());
     if (output_rank != output_tensor->info()->num_dimensions())
@@ -1577,84 +1325,23 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
     }
   }
 
-  fn->configure(input, outputs, axis);
+  auto fn = acl_common::generateLayer<arm_compute::NEUnstack>(input, outputs, axis);
 
   _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::Add &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEArithmeticAddition>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-                arm_compute::ConvertPolicy::SATURATE);
-
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Div &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEElementwiseDivision>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Exp &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEExpLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
 void KernelGenerator::visit(const ir::operation::ExpandDims &node)
 {
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
 
-  auto fn = std::make_unique<::arm_compute::NEReshapeLayer>();
+  auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
 
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::Comparison &node)
@@ -1665,56 +1352,15 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
 
   const auto comparison_type = node.param().comparison_type;
 
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input0_tensor = _tensor_builder->at(input0_index).get();
-  auto input1_tensor = _tensor_builder->at(input1_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEElementwiseComparison>();
-
-  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
-                (arm_compute::ComparisonOperation)comparison_type);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEElementwiseMin>();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get();
+  auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get();
 
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+  auto fn = acl_common::generateLayer<arm_compute::NEElementwiseComparison>(
+      input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+      (arm_compute::ComparisonOperation)comparison_type);
 
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEElementwiseMax>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 void KernelGenerator::visit(const ir::operation::OneHot &node)
@@ -1726,17 +1372,16 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
   const auto offvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
   const auto axis = node.param().axis;
 
-  auto output_tensor = _tensor_builder->at(out_idx).get();
-  auto indices_tensor = _tensor_builder->at(indices_idx).get();
-  auto depth_tensor = _tensor_builder->at(depth_idx).get();
-  auto onvalue_tensor = _tensor_builder->at(onvalue_idx).get();
-  auto offvalue_tensor = _tensor_builder->at(offvalue_idx).get();
-
-  auto fn = std::make_unique<::arm_compute::CPPOneHotEx>();
-  fn->configure(indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
-                offvalue_tensor->handle(), output_tensor->handle(), axis);
-  auto acl_fn = asAclFunction(std::move(fn));
-  _return_fn = std::move(acl_fn);
+  auto output_tensor = _tensor_reg->getAclTensor(out_idx).get();
+  auto indices_tensor = _tensor_reg->getAclTensor(indices_idx).get();
+  auto depth_tensor = _tensor_reg->getAclTensor(depth_idx).get();
+  auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx).get();
+  auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx).get();
+
+  auto fn = acl_common::generateLayer<arm_compute::CPPOneHotEx>(
+      indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
+      offvalue_tensor->handle(), output_tensor->handle(), axis);
+  _return_fn = asAclFunction(std::move(fn));
 }
 
 } // namespace acl_neon
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.h b/runtime/onert/backend/acl_neon/KernelGenerator.h
index d6f7932b7..4d269cde5 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.h
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.h
@@ -21,6 +21,8 @@
 
 #include "ir/Operands.h"
 #include "TensorBuilder.h"
+#include "AclTensorRegistry.h"
+#include "TensorManager.h"
 
 namespace onert
 {
@@ -33,75 +35,57 @@ class KernelGenerator : public IKernelGenerator
 {
 public:
   KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-                  const std::shared_ptr<TensorBuilder> &tensor_builder);
+                  const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
 
   void visit(const ir::OpSequence &) override;
-  void visit(const ir::operation::Abs &) override;
   void visit(const ir::operation::ArgMax &) override;
   void visit(const ir::operation::BatchToSpaceND &) override;
-  void visit(const ir::operation::Cast &) override;
+  void visit(const ir::operation::BinaryArithmetic &) override;
   void visit(const ir::operation::Conv2D &) override;
   void visit(const ir::operation::DepthToSpace &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::Dequantize &) override;
-  void visit(const ir::operation::MaxPool2D &) override;
-  void visit(const ir::operation::AvgPool2D &) override;
   void visit(const ir::operation::Concat &) override;
+  void visit(const ir::operation::ElementwiseActivation &) override;
+  void visit(const ir::operation::ElementwiseBinary &) override;
+  void visit(const ir::operation::ElementwiseUnary &) override;
   void visit(const ir::operation::EmbeddingLookup &) override;
-  void visit(const ir::operation::Floor &) override;
   void visit(const ir::operation::FullyConnected &) override;
   void visit(const ir::operation::Gather &) override;
   void visit(const ir::operation::HashtableLookup &) override;
   void visit(const ir::operation::InstanceNorm &) override;
   void visit(const ir::operation::L2Normalization &) override;
-  void visit(const ir::operation::L2Pool2D &) override;
   void visit(const ir::operation::LocalResponseNormalization &) override;
-  void visit(const ir::operation::LogicalAnd &) override;
-  void visit(const ir::operation::LogicalNot &) override;
-  void visit(const ir::operation::LogicalOr &) override;
-  void visit(const ir::operation::Logistic &) override;
   void visit(const ir::operation::LSTM &) override;
-  void visit(const ir::operation::Mul &) override;
-  void visit(const ir::operation::Neg &) override;
   void visit(const ir::operation::Pack &) override;
   void visit(const ir::operation::Pad &) override;
+  void visit(const ir::operation::Pool2D &) override;
   void visit(const ir::operation::Permute &) override;
   void visit(const ir::operation::PReLU &) override;
   void visit(const ir::operation::Reduce &) override;
-  void visit(const ir::operation::ReLU &) override;
-  void visit(const ir::operation::ReLU1 &) override;
-  void visit(const ir::operation::ReLU6 &) override;
   void visit(const ir::operation::Reshape &) override;
   void visit(const ir::operation::ResizeBilinear &) override;
   void visit(const ir::operation::RNN &) override;
-  void visit(const ir::operation::RSQRT &) override;
   void visit(const ir::operation::Squeeze &) override;
-  void visit(const ir::operation::Tanh &) override;
   void visit(const ir::operation::Softmax &) override;
   void visit(const ir::operation::SpaceToBatchND &) override;
   void visit(const ir::operation::SpaceToDepth &) override;
   void visit(const ir::operation::Split &) override;
-  void visit(const ir::operation::SQRT &) override;
   void visit(const ir::operation::SquaredDifference &) override;
-  void visit(const ir::operation::Sub &) override;
   void visit(const ir::operation::Slice &) override;
   void visit(const ir::operation::StridedSlice &) override;
   void visit(const ir::operation::TransposeConv &) override;
   void visit(const ir::operation::Transpose &) override;
   void visit(const ir::operation::Unpack &) override;
-  void visit(const ir::operation::Add &) override;
-  void visit(const ir::operation::Div &) override;
-  void visit(const ir::operation::Exp &) override;
   void visit(const ir::operation::ExpandDims &) override;
   void visit(const ir::operation::Comparison &) override;
-  void visit(const ir::operation::Min &) override;
-  void visit(const ir::operation::Max &) override;
   void visit(const ir::operation::OneHot &) override;
 
 private:
   const ir::Operands &_ctx;
   const ir::Operations &_operations_ctx;
   std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
   ir::Layout _current_op_seq_layout;
 };
 
diff --git a/runtime/onert/backend/acl_neon/Optimizer.cc b/runtime/onert/backend/acl_neon/Optimizer.cc
index 2948cab09..ac80901cc 100644
--- a/runtime/onert/backend/acl_neon/Optimizer.cc
+++ b/runtime/onert/backend/acl_neon/Optimizer.cc
@@ -19,7 +19,7 @@
 #include "ParentInfo.h"
 
 #include <cassert>
-#include <ir/LoweredGraph.h>
+#include <compiler/LoweredGraph.h>
 #include <util/logging.h>
 #include "AclSubTensorAnalyzer.h"
 
diff --git a/runtime/onert/backend/acl_neon/TensorManager.h b/runtime/onert/backend/acl_neon/TensorManager.h
index 3ec9efa8f..3b7cfbcfd 100644
--- a/runtime/onert/backend/acl_neon/TensorManager.h
+++ b/runtime/onert/backend/acl_neon/TensorManager.h
@@ -55,7 +55,7 @@ using InternalBufferManager = acl_common::AclInternalBufferManager<
 using TensorManager = acl_common::AclTensorManager<acl_neon::operand::INETensor, operand::NETensor,
                                                    operand::NESubTensor>;
 
-TensorManager *createTensorManager(bool is_linear_executor)
+inline TensorManager *createTensorManager(bool is_linear_executor)
 {
   if (is_linear_executor)
   {
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
index 56bd352e0..fc8574b26 100644
--- a/runtime/onert/backend/cpu/Backend.h
+++ b/runtime/onert/backend/cpu/Backend.h
@@ -47,10 +47,12 @@ public:
     const auto &operands = graph.operands();
     const auto &operations = graph.operations();
     auto context = std::make_unique<BackendContext>(this, &graph);
-    auto tb = std::make_shared<TensorBuilder>();
+    auto tr = std::make_shared<cpu_common::TensorRegistry>();
+    auto tb = std::make_shared<TensorBuilder>(tr);
+    context->tensor_registry = tr;
     context->tensor_builder = tb;
-    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
-    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, kb,
+    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
                                                             context->external_context());
     context->tensor_register = nullptr;
     context->optimizer = nullptr;
diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h
index f314a8e39..e90b21054 100644
--- a/runtime/onert/backend/cpu/BackendContext.h
+++ b/runtime/onert/backend/cpu/BackendContext.h
@@ -31,13 +31,15 @@ class BackendContext : public onert::backend::BackendContext
 {
 public:
   BackendContext(const Backend *backend, const ir::Graph *graph,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
                  std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
                  std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
                  std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
                  std::shared_ptr<ITensorRegister> tensor_register = nullptr,
                  std::shared_ptr<IOptimizer> optimizer = nullptr)
-      : onert::backend::BackendContext(backend, graph, tensor_builder, constant_initializer,
-                                       kernel_gen, tensor_register, optimizer),
+      : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder,
+                                       constant_initializer, kernel_gen, tensor_register,
+                                       optimizer),
         _external_context(new ExternalContext)
   {
   }
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/backend/cpu/ConstantInitializer.cc
index deb27f0fe..6f6eb77bc 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.cc
+++ b/runtime/onert/backend/cpu/ConstantInitializer.cc
@@ -25,8 +25,8 @@ namespace cpu
 {
 
 ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
-                                         const std::shared_ptr<TensorBuilder> &tensor_builder)
-    : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+                                         const std::shared_ptr<ITensorRegistry> &tensor_reg)
+    : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
 {
   // DO NOTHING
 }
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h
index de03a693a..c016c83bc 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.h
+++ b/runtime/onert/backend/cpu/ConstantInitializer.h
@@ -17,7 +17,7 @@
 #ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
 #define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
 
-#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
 
 #include <backend/IConstantInitializer.h>
 #include <ir/Operands.h>
@@ -33,7 +33,7 @@ class ConstantInitializer : public IConstantInitializer
 {
 public:
   ConstantInitializer(const ir::Operands &operands,
-                      const std::shared_ptr<TensorBuilder> &tensor_builder);
+                      const std::shared_ptr<ITensorRegistry> &tensor_reg);
 
 public:
   void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
@@ -50,10 +50,10 @@ public:
   void visit(const ir::operation::FullyConnected &) override;
 
 private:
-  std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
+  std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
 
 private:
-  std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<ITensorRegistry> _tensor_reg;
 };
 
 } // namespace cpu
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 7939fe894..74b6f0c6b 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -16,49 +16,36 @@
 
 #include "KernelGenerator.h"
 
-#include "ops/AbsLayer.h"
-#include "ops/AddLayer.h"
 #include "ops/ArgMinMaxLayer.h"
-#include "ops/AvgPoolLayer.h"
 #include "ops/BatchToSpaceNDLayer.h"
-#include "ops/CastLayer.h"
+#include "ops/BinaryArithmeticLayer.h"
 #include "ops/CompareLayer.h"
 #include "ops/ConcatLayer.h"
 #include "ops/ConvolutionLayer.h"
-#include "ops/CosLayer.h"
 #include "ops/DepthwiseConvolutionLayer.h"
-#include "ops/DivLayer.h"
 #include "ops/EinsumLayer.h"
-#include "ops/ExpLayer.h"
+#include "ops/ElementwiseActivationLayer.h"
+#include "ops/ElementwiseBinaryLayer.h"
+#include "ops/ElementwiseUnaryLayer.h"
 #include "ops/ExpandDimsLayer.h"
 #include "ops/FillLayer.h"
 #include "ops/FullyConnectedLayer.h"
 #include "ops/GatherLayer.h"
-#include "ops/LogLayer.h"
-#include "ops/LogisticLayer.h"
-#include "ops/MaxLayer.h"
-#include "ops/MaxPoolLayer.h"
 #include "ops/MeanLayer.h"
-#include "ops/MinLayer.h"
-#include "ops/MulLayer.h"
-#include "ops/NegLayer.h"
 #include "ops/OneHotLayer.h"
 #include "ops/OperationUtils.h"
 #include "ops/PackLayer.h"
 #include "ops/PadLayer.h"
+#include "ops/PoolLayer.h"
 #include "ops/PowLayer.h"
 #include "ops/RangeLayer.h"
+#include "ops/RankLayer.h"
 #include "ops/ReduceLayer.h"
-#include "ops/ReLULayer.h"
-#include "ops/ReLU6Layer.h"
 #include "ops/ReshapeLayer.h"
 #include "ops/ResizeBilinearLayer.h"
 #include "ops/ReverseLayer.h"
-#include "ops/RoundLayer.h"
-#include "ops/RsqrtLayer.h"
 #include "ops/SelectLayer.h"
 #include "ops/ShapeLayer.h"
-#include "ops/SinLayer.h"
 #include "ops/SliceLayer.h"
 #include "ops/SoftMaxLayer.h"
 #include "ops/StridedSliceLayer.h"
@@ -66,22 +53,16 @@
 #include "ops/SpaceToDepthLayer.h"
 #include "ops/SplitLayer.h"
 #include "ops/SplitVLayer.h"
-#include "ops/SubLayer.h"
-#include "ops/TanhLayer.h"
 #include "ops/TileLayer.h"
 #include "ops/TransposeLayer.h"
 #include "ops/UnpackLayer.h"
-#include "ops/LogicalNotLayer.h"
-#include "ops/ZerosLikeLayer.h"
 #include "ops/SquaredDiffLayer.h"
-#include "ops/LogicalOrLayer.h"
 #include "ops/L2NormLayer.h"
 #include "ops/MatrixBandPartLayer.h"
 #include "ops/BatchMatMulLayer.h"
 #include "ops/BroadcastToLayer.h"
 #include "ops/FusedBatchNormLayer.h"
 #include "ops/LogSoftMaxLayer.h"
-#include "ops/QuantizeLayer.h"
 #include "ops/StatelessRandomUniformLayer.h"
 
 #include <backend/Backend.h>
@@ -102,6 +83,104 @@ namespace cpu
 
 namespace
 {
+ops::ArithmeticType
+convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
+{
+  switch (arithmetic_type_ir)
+  {
+    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+      return ops::ArithmeticType::kAdd;
+    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+      return ops::ArithmeticType::kSub;
+    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+      return ops::ArithmeticType::kMul;
+    case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+      return ops::ArithmeticType::kDiv;
+    default:
+      throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+  }
+}
+
+ops::ElementwiseActivationType
+convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
+{
+  switch (type_ir)
+  {
+    case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+      return ops::ElementwiseActivationType::kLogistic;
+    case ir::operation::ElementwiseActivation::Type::RELU:
+      return ops::ElementwiseActivationType::kReLU;
+    case ir::operation::ElementwiseActivation::Type::TANH:
+      return ops::ElementwiseActivationType::kTanh;
+    default:
+      throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+  }
+}
+
+ops::ElementwiseBinaryType
+convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
+{
+  switch (type_ir)
+  {
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+      return ops::ElementwiseBinaryType::kLogicalOr;
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+      return ops::ElementwiseBinaryType::kMax;
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+      return ops::ElementwiseBinaryType::kMin;
+    default:
+      throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+  }
+}
+
+ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
+{
+  switch (type_ir)
+  {
+    case ir::operation::ElementwiseUnary::Type::ABS:
+      return ops::ElementwiseUnaryType::kAbs;
+    case ir::operation::ElementwiseUnary::Type::CAST:
+      return ops::ElementwiseUnaryType::kCast;
+    case ir::operation::ElementwiseUnary::Type::COS:
+      return ops::ElementwiseUnaryType::kCos;
+    case ir::operation::ElementwiseUnary::Type::ERF:
+      return ops::ElementwiseUnaryType::kErf;
+    case ir::operation::ElementwiseUnary::Type::EXP:
+      return ops::ElementwiseUnaryType::kExp;
+    case ir::operation::ElementwiseUnary::Type::LOG:
+      return ops::ElementwiseUnaryType::kLog;
+    case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+      return ops::ElementwiseUnaryType::kLogicalNot;
+    case ir::operation::ElementwiseUnary::Type::NEG:
+      return ops::ElementwiseUnaryType::kNeg;
+    case ir::operation::ElementwiseUnary::Type::QUANTIZE:
+      return ops::ElementwiseUnaryType::kQuantize;
+    case ir::operation::ElementwiseUnary::Type::ROUND:
+      return ops::ElementwiseUnaryType::kRound;
+    case ir::operation::ElementwiseUnary::Type::RSQRT:
+      return ops::ElementwiseUnaryType::kRSqrt;
+    case ir::operation::ElementwiseUnary::Type::SIN:
+      return ops::ElementwiseUnaryType::kSin;
+    case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
+      return ops::ElementwiseUnaryType::kZerosLike;
+    default:
+      throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+  }
+}
+
+ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
+{
+  switch (type_ir)
+  {
+    case ir::operation::Pool2D::PoolType::AVG:
+      return ops::PoolType::kAvg;
+    case ir::operation::Pool2D::PoolType::MAX:
+      return ops::PoolType::kMax;
+    default:
+      throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+  }
+}
+
 ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
 {
   switch (reduce_type_ir)
@@ -127,11 +206,12 @@ ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_
 KernelGenerator::KernelGenerator(
     const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
     const std::shared_ptr<TensorBuilder> &tensor_builder,
+    const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
     const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
     const std::shared_ptr<ExternalContext> &external_context)
     : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
-      _kernel_builder(kernel_builder), _current_op_seq_layout(ir::Layout::UNKNOWN),
-      _external_context(external_context)
+      _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+      _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context)
 {
   // DO NOTHING
 }
@@ -140,11 +220,9 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
 {
   assert(!_return_fn_seq);
   assert(_tensor_builder->dynamicTensorManager());
-  assert(_tensor_builder->tensorRegistry());
+  assert(_tensor_reg);
 
-  auto dyn_tensor_manager = _tensor_builder->dynamicTensorManager();
-  auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(
-      _ctx, dyn_tensor_manager, _tensor_builder->tensorRegistry());
+  auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
 
   _return_fn_seq = std::make_unique<exec::FunctionSequence>();
 
@@ -154,7 +232,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
     dyn_ctx->op_seq = &op_seq;
     dyn_ctx->operations = &_operations_ctx;
     dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-    dyn_ctx->tensor_registry = _tensor_builder->tensorRegistry();
+    dyn_ctx->tensor_registry = _tensor_reg;
     dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
 
     _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
@@ -170,13 +248,13 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
 
     for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
     {
-      auto portable_tensor = _tensor_builder->portableAt(ind);
+      auto portable_tensor = _tensor_reg->getPortableTensor(ind);
       if (portable_tensor)
       {
         assert(portable_tensor->layout() == ir::Layout::NHWC);
       }
 
-      auto tensor = _tensor_builder->at(ind);
+      auto tensor = _tensor_reg->getNativeTensor(ind);
       if (tensor)
       {
         tensor->increase_ref();
@@ -194,21 +272,23 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
   const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
   const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
 
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-  auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
-  auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
+  auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
 
   const auto stride = node.param().stride;
   const auto activation = node.param().activation;
   const auto param_padding = node.param().padding;
+  const auto dilation = node.param().dilation;
   auto fn = std::make_unique<ops::ConvolutionLayer>();
 
   if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
   {
     fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
                   param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
-                  stride.horizontal, stride.vertical, activation, ofm_tensor);
+                  stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
+                  activation, ofm_tensor);
 
     _return_fn = std::move(fn);
     return;
@@ -221,11 +301,12 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
   const auto ker_width = ker_shape.dim(2);
 
   const auto padding =
-      ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
+      ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                           dilation.width_factor, dilation.height_factor);
 
   fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
                 padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
-                activation, ofm_tensor);
+                dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -251,10 +332,10 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto multiplier = node.param().multiplier;
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-  auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
-  auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
+  auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
 
   auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
 
@@ -265,57 +346,6 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
-
-  const auto kh = node.param().kh;
-  const auto kw = node.param().kw;
-
-  const auto stride = node.param().stride;
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::MaxPoolLayer>();
-
-  fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
-                stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
-  const auto kh = node.param().kh;
-  const auto kw = node.param().kw;
-  const auto stride = node.param().stride;
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::AvgPoolLayer>();
-
-  fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
-                stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
 void KernelGenerator::visit(const ir::operation::Concat &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
@@ -323,11 +353,11 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
   const auto rank = _ctx.at(ofm_index).shape().rank();
   const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
 
-  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
 
   std::vector<const IPortableTensor *> input_tensors;
   for (auto &ifm_idx : node.getInputs())
-    input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
 
   auto fn = std::make_unique<ops::ConcatLayer>();
 
@@ -342,9 +372,9 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
   const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
   const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto block_size_alloc = _tensor_builder->portableAt(block_size_index).get();
+  auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
+  auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index).get();
 
   auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
 
@@ -354,7 +384,7 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
   if (node.getInputs().size() != NNApiInputs)
   {
     const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
-    crops_alloc = _tensor_builder->portableAt(crops_data_index).get();
+    crops_alloc = _tensor_reg->getPortableTensor(crops_data_index).get();
   }
 
   fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
@@ -368,9 +398,9 @@ void KernelGenerator::visit(const ir::operation::Fill &node)
   const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
   const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto value_tensor = _tensor_builder->portableAt(value_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto value_tensor = _tensor_reg->getPortableTensor(value_index).get();
 
   auto fn = std::make_unique<ops::FillLayer>();
 
@@ -389,11 +419,11 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
   const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
   const auto activation = node.param().activation;
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto weight_tensor = _tensor_builder->portableAt(weight_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto weight_tensor = _tensor_reg->getPortableTensor(weight_index).get();
   auto bias_tensor =
-      bias_index.undefined() ? nullptr : _tensor_builder->portableAt(bias_index).get();
+      bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index).get();
 
   auto fn = std::make_unique<ops::FullyConnectedLayer>();
 
@@ -408,8 +438,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
   // optional 2nd input
   IPortableTensor *shape_tensor = nullptr;
@@ -417,7 +447,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
   if (node.getInputs().size() == 2)
   {
     const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
-    shape_tensor = _tensor_builder->portableAt(shape_index).get();
+    shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
   }
 
   auto fn = std::make_unique<ops::ReshapeLayer>();
@@ -431,8 +461,8 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
   // Squeeze can share same kernel with reshape
   auto fn = std::make_unique<ops::ReshapeLayer>();
@@ -449,8 +479,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
 
   const auto beta = node.param().beta;
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
   auto fn = std::make_unique<ops::SoftMaxLayer>();
 
@@ -459,21 +489,22 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Add &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
+  const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
 
   const auto activation = node.param().activation;
 
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
 
-  auto fn = std::make_unique<ops::AddLayer>();
+  auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
 
-  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
+  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
+                convertArithmeticType(node.param().arithmetic_type));
 
   _return_fn = std::move(fn);
 }
@@ -484,9 +515,9 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
   const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
   const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
 
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
 
   auto comparison_type = node.param().comparison_type;
 
@@ -503,9 +534,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
   const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
 
   const auto backend_layout = output_tensor->layout();
   UNUSED_RELEASE(backend_layout);
@@ -534,46 +565,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Sub &node)
-{
-  // The same as Add
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
-  auto fn = std::make_unique<ops::SubLayer>();
-
-  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
-  // The same as Add
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
-  auto fn = std::make_unique<ops::MulLayer>();
-
-  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
 void KernelGenerator::visit(const ir::operation::OneHot &node)
 {
   const auto output_index{node.getOutputs().at(0)};
@@ -584,11 +575,11 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
 
   const auto axis = node.param().axis;
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
-  auto depth_tensor = _tensor_builder->portableAt(depth_index).get();
-  auto onvalue_tensor = _tensor_builder->portableAt(onvalue_index).get();
-  auto offvalue_tensor = _tensor_builder->portableAt(offvalue_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
+  auto depth_tensor = _tensor_reg->getPortableTensor(depth_index).get();
+  auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index).get();
+  auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index).get();
 
   assert(indices_tensor->data_type() == OperandType::INT32);
   assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
@@ -600,34 +591,14 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Div &node)
-{
-  // The same as Add
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
-  auto fn = std::make_unique<ops::DivLayer>();
-
-  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
 void KernelGenerator::visit(const ir::operation::Einsum &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
 
-  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
   std::vector<const IPortableTensor *> input_tensors;
   for (auto &ifm_idx : node.getInputs())
-    input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
 
   const auto equation = node.param().equation;
 
@@ -648,7 +619,7 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
       const auto &operand = _ctx.at(idx);
       // TODO make sure using `_current_op_seq_layout` is correct for custom operations
       types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
-      auto in_tensor = _tensor_builder->portableAt(idx);
+      auto in_tensor = _tensor_reg->getPortableTensor(idx);
       tensors.emplace_back(in_tensor);
     }
   };
@@ -666,64 +637,68 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Exp &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
+  const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
-  auto fn = std::make_unique<ops::ExpLayer>();
+  auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
 
-  fn->configure(input_tensor, output_tensor);
+  fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
+                convertElementwiseActivationType(node.param().op_type));
 
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::ExpandDims &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
-  const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
+  const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
 
-  auto fn = std::make_unique<ops::ExpandDimsLayer>();
+  auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
 
-  fn->configure(input_tensor, axis_tensor, output_tensor);
+  fn->configure(lhs_tensor, rhs_tensor, output_tensor,
+                convertElementwiseBinaryType(node.param().op_type));
 
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Logistic &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
+  const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
-  auto fn = std::make_unique<ops::LogisticLayer>();
+  auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
 
-  fn->configure(input_tensor, output_tensor);
+  fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
 
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Tanh &node)
+void KernelGenerator::visit(const ir::operation::ExpandDims &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
+  const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
+  const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
 
-  auto fn = std::make_unique<ops::TanhLayer>();
+  auto fn = std::make_unique<ops::ExpandDimsLayer>();
 
-  fn->configure(input_tensor, output_tensor);
+  fn->configure(input_tensor, axis_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -737,11 +712,11 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
 
   assert(-rank <= axis && axis < rank);
 
-  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
 
   std::vector<const IPortableTensor *> input_tensors;
   for (auto &ifm_idx : node.getInputs())
-    input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
 
   auto fn = std::make_unique<ops::PackLayer>();
 
@@ -759,11 +734,11 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
 
   assert(rank == 0 || (-rank <= axis && axis < rank));
 
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
   std::vector<IPortableTensor *> output_tensors;
   for (auto &output_idx : node.getOutputs())
-    output_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+    output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
 
   auto fn = std::make_unique<ops::UnpackLayer>();
 
@@ -781,8 +756,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
   const auto output_index{node.getOutputs().at(0)};
   assert(_ctx.at(pad_index).data());
 
-  auto input = _tensor_builder->portableAt(input_index).get();
-  auto output = _tensor_builder->portableAt(output_index).get();
+  auto input = _tensor_reg->getPortableTensor(input_index).get();
+  auto output = _tensor_reg->getPortableTensor(output_index).get();
   auto pad_rank = _ctx.at(pad_index).shape().dim(0);
   auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
 
@@ -801,62 +776,13 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
-  auto fn = std::make_unique<ops::MaxLayer>();
-
-  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
-  auto fn = std::make_unique<ops::MinLayer>();
-
-  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Cast &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::CastLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
 void KernelGenerator::visit(const ir::operation::Transpose &node)
 {
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
   auto fn = std::make_unique<ops::TransposeLayer>();
 
@@ -872,9 +798,9 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
 
   const auto keep_dims = node.param().keep_dims;
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto axes_tensor = _tensor_builder->portableAt(axes_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto axes_tensor = _tensor_reg->getPortableTensor(axes_index).get();
 
   if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
   {
@@ -895,36 +821,6 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   }
 }
 
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(0)};
-
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
-  auto fn = std::make_unique<ops::ReLULayer>();
-
-  fn->configure(input_tensor, output_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(0)};
-
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
-  auto fn = std::make_unique<ops::ReLU6Layer>();
-
-  fn->configure(input_tensor, output_tensor);
-
-  _return_fn = std::move(fn);
-}
-
 void KernelGenerator::visit(const ir::operation::Select &node)
 {
   const auto output_index{node.getOutputs().at(0)};
@@ -932,10 +828,10 @@ void KernelGenerator::visit(const ir::operation::Select &node)
   const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
   const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto condition_tensor = _tensor_builder->portableAt(condition_index).get();
-  auto true_tensor = _tensor_builder->portableAt(true_index).get();
-  auto false_tensor = _tensor_builder->portableAt(false_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto condition_tensor = _tensor_reg->getPortableTensor(condition_index).get();
+  auto true_tensor = _tensor_reg->getPortableTensor(true_index).get();
+  auto false_tensor = _tensor_reg->getPortableTensor(false_index).get();
 
   auto fn = std::make_unique<ops::SelectLayer>();
 
@@ -951,10 +847,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
   const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
   const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto begins_tensor = _tensor_builder->portableAt(begins_index).get();
-  auto sizes_tensor = _tensor_builder->portableAt(sizes_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto begins_tensor = _tensor_reg->getPortableTensor(begins_index).get();
+  auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index).get();
 
   auto fn = std::make_unique<ops::SliceLayer>();
 
@@ -971,11 +867,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
   const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
   const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto starts_tensor = _tensor_builder->portableAt(starts_index).get();
-  auto ends_tensor = _tensor_builder->portableAt(ends_index).get();
-  auto strides_tensor = _tensor_builder->portableAt(strides_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto starts_tensor = _tensor_reg->getPortableTensor(starts_index).get();
+  auto ends_tensor = _tensor_reg->getPortableTensor(ends_index).get();
+  auto strides_tensor = _tensor_reg->getPortableTensor(strides_index).get();
 
   auto begin_mask = node.param().begin_mask;
   auto end_mask = node.param().end_mask;
@@ -999,11 +895,11 @@ void KernelGenerator::visit(const ir::operation::Split &node)
   const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
   auto axis_resolved = axis < 0 ? axis + rank : axis;
 
-  auto in_tensor = _tensor_builder->portableAt(input_idx).get();
+  auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
 
   std::vector<IPortableTensor *> out_tensors;
   for (auto &output_idx : node.getOutputs())
-    out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+    out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
 
   auto fn = std::make_unique<ops::SplitLayer>();
 
@@ -1012,73 +908,13 @@ void KernelGenerator::visit(const ir::operation::Split &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::AbsLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Sin &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Sin::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::SinLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Cos &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Cos::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::CosLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::RsqrtLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
 void KernelGenerator::visit(const ir::operation::Shape &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
   const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
 
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
 
   auto fn = std::make_unique<ops::ShapeLayer>();
 
@@ -1097,8 +933,8 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
   auto align_corners = node.param().align_corners;
   auto half_pixel_centers = node.param().half_pixel_centers;
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
   auto fn = std::make_unique<ops::ResizeBilinearLayer>();
 
@@ -1114,9 +950,9 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
   const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
   const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
 
   auto fn = std::make_unique<ops::ReverseLayer>();
 
@@ -1125,21 +961,6 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::NegLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
 void KernelGenerator::visit(const ir::operation::ArgMax &node)
 {
   const auto output_index{node.getOutputs().at(0)};
@@ -1147,8 +968,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
 
   const auto axis = node.param().axis;
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
   auto fn = std::make_unique<ops::ArgMinMaxLayer>();
 
@@ -1157,81 +978,45 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Pow &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
-
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
-  auto fn = std::make_unique<ops::PowLayer>();
-
-  fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Log &node)
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Log::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::LogLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
+  const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
 
-void KernelGenerator::visit(const ir::operation::Round &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Round::INPUT)};
+  const auto kh = node.param().kh;
+  const auto kw = node.param().kw;
+  const auto stride = node.param().stride;
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto padding =
+      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+  const auto activation = node.param().activation;
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
 
-  auto fn = std::make_unique<ops::RoundLayer>();
+  auto fn = std::make_unique<ops::PoolLayer>();
 
-  fn->configure(input_tensor, output_tensor);
+  fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
+                stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
+                convertPoolType(node.param().op_type));
 
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
+void KernelGenerator::visit(const ir::operation::Pow &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::LogicalNot::INPUT)};
-
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
-  auto fn = std::make_unique<ops::LogicalNotLayer>();
-
-  fn->configure(input_tensor, output_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(0)};
-  const auto rhs_index{node.getInputs().at(1)};
+  const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
 
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
 
-  auto fn = std::make_unique<ops::LogicalOrLayer>();
+  auto fn = std::make_unique<ops::PowLayer>();
 
-  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
+  fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -1241,8 +1026,8 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(0)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
 
   auto fn = std::make_unique<ops::L2NormLayer>();
 
@@ -1251,35 +1036,36 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::ZerosLike &node)
+void KernelGenerator::visit(const ir::operation::Range &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ZerosLike::INPUT)};
+  const auto start_index{node.getInputs().at(ir::operation::Range::START)};
+  const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
+  const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto start_tensor = _tensor_reg->getPortableTensor(start_index).get();
+  auto limit_tensor = _tensor_reg->getPortableTensor(limit_index).get();
+  auto delta_tensor = _tensor_reg->getPortableTensor(delta_index).get();
 
-  auto fn = std::make_unique<ops::ZerosLikeLayer>();
+  auto fn = std::make_unique<ops::RangeLayer>();
 
-  fn->configure(input_tensor, output_tensor);
+  fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Range &node)
+void KernelGenerator::visit(const ir::operation::Rank &node)
 {
-  const auto output_index{node.getOutputs().at(0)};
-  const auto start_index{node.getInputs().at(ir::operation::Range::START)};
-  const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
-  const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto start_tensor = _tensor_builder->portableAt(start_index).get();
-  auto limit_tensor = _tensor_builder->portableAt(limit_index).get();
-  auto delta_tensor = _tensor_builder->portableAt(delta_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
 
-  auto fn = std::make_unique<ops::RangeLayer>();
+  auto fn = std::make_unique<ops::RankLayer>();
+
+  fn->configure(ifm_tensor, ofm_tensor);
 
-  fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
   _return_fn = std::move(fn);
 }
 
@@ -1289,9 +1075,9 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
 
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
 
   auto fn = std::make_unique<ops::SqDiffLayer>();
 
@@ -1305,9 +1091,9 @@ void KernelGenerator::visit(const ir::operation::Tile &node)
   const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
   const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto multiples_tensor = _tensor_builder->portableAt(multiples_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index).get();
 
   auto fn = std::make_unique<ops::TileLayer>();
 
@@ -1322,10 +1108,10 @@ void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
   const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
   const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto num_lower_tensor = _tensor_builder->portableAt(num_lower_index).get();
-  auto num_upper_tensor = _tensor_builder->portableAt(num_upper_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index).get();
+  auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index).get();
 
   auto fn = std::make_unique<ops::MatrixBandPartLayer>();
 
@@ -1339,9 +1125,9 @@ void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
   const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
 
   const auto adj_x = node.param().adj_x;
   const auto adj_y = node.param().adj_y;
@@ -1358,9 +1144,9 @@ void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
   const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
   const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto shape_tensor = _tensor_builder->portableAt(shape_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
 
   auto fn = std::make_unique<ops::BroadcastToLayer>();
 
@@ -1373,10 +1159,10 @@ void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
 
-  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
   std::vector<const IPortableTensor *> input_tensors;
   for (auto &ifm_idx : node.getInputs())
-    input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
 
   const auto epsilon = node.param().epsilon;
   const auto is_training = node.param().is_training;
@@ -1397,8 +1183,8 @@ void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
   const auto beta = node.param().beta;
   const auto axis = node.param().axis;
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
 
   auto fn = std::make_unique<ops::LogSoftMaxLayer>();
 
@@ -1414,10 +1200,10 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
   const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
   const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
 
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto block_shape_tensor = _tensor_builder->portableAt(block_shape_index).get();
-  auto padding_tensor = _tensor_builder->portableAt(padding_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index).get();
+  auto padding_tensor = _tensor_reg->getPortableTensor(padding_index).get();
 
   auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
 
@@ -1426,29 +1212,14 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::Quantize &node)
-{
-  const auto input_index{node.getInputs().at(ir::operation::Quantize::Input::INPUT)};
-  const auto output_index{node.getOutputs().at(0)};
-
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-
-  auto fn = std::make_unique<ops::QuantizeLayer>();
-
-  fn->configure(input_tensor, output_tensor);
-
-  _return_fn = std::move(fn);
-}
-
 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
 {
   const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
   const auto output_index{node.getOutputs().at(0)};
   auto block_size = node.param().block_size;
 
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
 
   auto fn = std::make_unique<ops::SpaceToDepthLayer>();
 
@@ -1462,9 +1233,9 @@ void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
   const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
   const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
 
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto shape_alloc = _tensor_builder->portableAt(shape_index).get();
-  auto seed_alloc = _tensor_builder->portableAt(seed_index).get();
+  auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+  auto shape_alloc = _tensor_reg->getPortableTensor(shape_index).get();
+  auto seed_alloc = _tensor_reg->getPortableTensor(seed_index).get();
 
   auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
 
@@ -1481,13 +1252,13 @@ void KernelGenerator::visit(const ir::operation::SplitV &node)
   const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
   const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
 
-  auto in_tensor = _tensor_builder->portableAt(input_idx).get();
-  auto in_size_splits = _tensor_builder->portableAt(size_splits).get();
-  auto in_split_dim = _tensor_builder->portableAt(split_dim).get();
+  auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
+  auto in_size_splits = _tensor_reg->getPortableTensor(size_splits).get();
+  auto in_split_dim = _tensor_reg->getPortableTensor(split_dim).get();
 
   std::vector<IPortableTensor *> out_tensors;
   for (auto &output_idx : node.getOutputs())
-    out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+    out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
 
   auto fn = std::make_unique<ops::SplitVLayer>();
 
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h
index 40c056a96..786e68ee0 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -19,6 +19,7 @@
 
 #include "ExternalContext.h"
 #include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
 #include "Tensor.h"
 
 #include <backend/CustomKernelBuilder.h>
@@ -38,6 +39,7 @@ class KernelGenerator : public IKernelGenerator
 public:
   KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
                   const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
                   const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
                   const std::shared_ptr<ExternalContext> &external_context);
 
@@ -46,8 +48,6 @@ public:
   void visit(const ir::OpSequence &) override;
   void visit(const ir::operation::Conv2D &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::MaxPool2D &) override;
-  void visit(const ir::operation::AvgPool2D &) override;
   void visit(const ir::operation::Concat &) override;
   void visit(const ir::operation::Fill &) override;
   void visit(const ir::operation::FullyConnected &) override;
@@ -55,51 +55,35 @@ public:
   void visit(const ir::operation::Squeeze &) override;
   void visit(const ir::operation::Softmax &) override;
   void visit(const ir::operation::Comparison &) override;
-  void visit(const ir::operation::Add &) override;
-  void visit(const ir::operation::Sub &) override;
-  void visit(const ir::operation::Mul &) override;
-  void visit(const ir::operation::Div &) override;
+  void visit(const ir::operation::BinaryArithmetic &) override;
   void visit(const ir::operation::Einsum &) override;
   void visit(const ir::operation::Gather &) override;
   void visit(const ir::operation::Custom &node) override;
-  void visit(const ir::operation::Exp &) override;
+  void visit(const ir::operation::ElementwiseActivation &) override;
+  void visit(const ir::operation::ElementwiseBinary &) override;
+  void visit(const ir::operation::ElementwiseUnary &) override;
   void visit(const ir::operation::ExpandDims &) override;
-  void visit(const ir::operation::Logistic &) override;
   void visit(const ir::operation::Pad &) override;
-  void visit(const ir::operation::Max &) override;
-  void visit(const ir::operation::Min &) override;
-  void visit(const ir::operation::Tanh &) override;
   void visit(const ir::operation::Pack &) override;
   void visit(const ir::operation::Unpack &) override;
   void visit(const ir::operation::OneHot &) override;
-  void visit(const ir::operation::Cast &) override;
   void visit(const ir::operation::Transpose &) override;
   void visit(const ir::operation::Reduce &) override;
-  void visit(const ir::operation::ReLU &) override;
-  void visit(const ir::operation::ReLU6 &) override;
   void visit(const ir::operation::Select &) override;
   void visit(const ir::operation::Slice &) override;
   void visit(const ir::operation::StridedSlice &) override;
   void visit(const ir::operation::Split &) override;
-  void visit(const ir::operation::Abs &) override;
-  void visit(const ir::operation::Cos &) override;
-  void visit(const ir::operation::Sin &) override;
-  void visit(const ir::operation::RSQRT &) override;
   void visit(const ir::operation::Shape &) override;
   void visit(const ir::operation::ResizeBilinear &node) override;
   void visit(const ir::operation::Reverse &) override;
-  void visit(const ir::operation::Neg &) override;
   void visit(const ir::operation::ArgMax &) override;
-  void visit(const ir::operation::Log &) override;
-  void visit(const ir::operation::Round &) override;
+  void visit(const ir::operation::Pool2D &) override;
   void visit(const ir::operation::Pow &) override;
-  void visit(const ir::operation::LogicalNot &) override;
-  void visit(const ir::operation::ZerosLike &) override;
   void visit(const ir::operation::SquaredDifference &) override;
   void visit(const ir::operation::Tile &) override;
-  void visit(const ir::operation::LogicalOr &) override;
   void visit(const ir::operation::L2Normalization &) override;
   void visit(const ir::operation::Range &) override;
+  void visit(const ir::operation::Rank &) override;
   void visit(const ir::operation::MatrixBandPart &) override;
   void visit(const ir::operation::BatchMatMul &) override;
   void visit(const ir::operation::BatchToSpaceND &) override;
@@ -107,7 +91,6 @@ public:
   void visit(const ir::operation::FusedBatchNorm &) override;
   void visit(const ir::operation::LogSoftmax &) override;
   void visit(const ir::operation::SpaceToBatchND &) override;
-  void visit(const ir::operation::Quantize &) override;
   void visit(const ir::operation::SpaceToDepth &) override;
   void visit(const ir::operation::StatelessRandomUniform &) override;
   void visit(const ir::operation::SplitV &) override;
@@ -116,6 +99,7 @@ private:
   const ir::Operands &_ctx;
   const ir::Operations &_operations_ctx;
   std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
   std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
   ir::Layout _current_op_seq_layout;
   const std::shared_ptr<ExternalContext> _external_context;
diff --git a/runtime/onert/backend/cpu/TensorBuilder.cc b/runtime/onert/backend/cpu/TensorBuilder.cc
index ab8ba5756..828d52f7c 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.cc
+++ b/runtime/onert/backend/cpu/TensorBuilder.cc
@@ -27,8 +27,8 @@ namespace backend
 namespace cpu
 {
 
-TensorBuilder::TensorBuilder()
-    : _tensor_reg{new cpu_common::TensorRegistry()},
+TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
+    : _tensor_reg{tensor_reg},
       _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
       _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
 {
@@ -57,7 +57,7 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
   assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
   const auto tensor_info = _tensor_info_map.at(ind);
 
-  if (!at(ind)->is_dynamic())
+  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
   {
     const auto size = tensor_info.total_size();
     _static_tensor_mgr->claimPlan(ind, size);
@@ -66,7 +66,7 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
 
 void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
 {
-  if (!at(ind)->is_dynamic())
+  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
   {
     _static_tensor_mgr->releasePlan(ind);
   }
@@ -85,29 +85,6 @@ void TensorBuilder::allocate()
   //      This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
 }
 
-std::shared_ptr<ITensor> TensorBuilder::tensorAt(const ir::OperandIndex &ind)
-{
-  return _tensor_reg->getITensor(ind);
-}
-
-std::shared_ptr<IPortableTensor> TensorBuilder::portableAt(const ir::OperandIndex &ind)
-{
-  return _tensor_reg->getPortableTensor(ind);
-}
-
-bool TensorBuilder::setMigrantTensor(const ir::OperandIndex &ind,
-                                     const std::shared_ptr<IPortableTensor> &tensor)
-{
-  return _tensor_reg->setMigrantTensor(ind, tensor);
-}
-
-void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->iterate(fn); }
-
-std::shared_ptr<Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
-{
-  return _tensor_reg->getNativeTensor(ind);
-}
-
 std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
 {
   return std::move(_static_tensor_mgr);
diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h
index 617136514..b6d5f09cc 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.h
+++ b/runtime/onert/backend/cpu/TensorBuilder.h
@@ -38,9 +38,7 @@ namespace cpu
 class TensorBuilder : public ITensorBuilder
 {
 public:
-  TensorBuilder();
-
-  bool supportDynamicTensor() override { return true; }
+  TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
 
   /**
    * @brief     Register tensor information to allocate on CPU backend
@@ -60,34 +58,12 @@ public:
   void allocate() override;
   void postFunctionPrepare() override { /* DO NOTHING */}
 
-  /**
-   * @brief Get tensor with a specific OperandIndex
-   *
-   * @return shared_ptr<ITensor> if a tensor with given OperandIndex exists. nullptr otherwise.
-   */
-  std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
-
-  void iterate(const IterateFunction &fn) override;
-
   std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
 
   IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
 
   std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) override;
 
-  /**
-   * @brief Get tensor with a specific OperandIndex.
-   * @param ind OperandIndex for the tensor. There must exist a tensor with this ind.
-   *        If not, program will crash with assert or exception.
-   * @return shared_ptr<Tensor>
-   */
-  std::shared_ptr<Tensor> at(const ir::OperandIndex &ind);
-  std::shared_ptr<IPortableTensor> portableAt(const ir::OperandIndex &ind);
-  bool setMigrantTensor(const ir::OperandIndex &ind,
-                        const std::shared_ptr<IPortableTensor> &tensor) override;
-
-  std::shared_ptr<ITensorRegistry> tensorRegistry() override { return _tensor_reg; }
-
 private:
   const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
   std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
diff --git a/runtime/onert/backend/cpu/ops/AbsLayer.cc b/runtime/onert/backend/cpu/ops/AbsLayer.cc
deleted file mode 100644
index 322785aeb..000000000
--- a/runtime/onert/backend/cpu/ops/AbsLayer.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AbsLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-AbsLayer::AbsLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void AbsLayer::absFloat32()
-{
-  nnfw::cker::Abs(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                  getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void AbsLayer::absQuant8() { throw std::runtime_error{"NYI"}; }
-
-void AbsLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void AbsLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    absFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    absQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Abs: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/AbsLayer.h b/runtime/onert/backend/cpu/ops/AbsLayer.h
deleted file mode 100644
index feb5f35ae..000000000
--- a/runtime/onert/backend/cpu/ops/AbsLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
-
-#include "backend/IPortableTensor.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AbsLayer : public ::onert::exec::IFunction
-{
-public:
-  AbsLayer();
-
-public:
-  void absFloat32();
-
-  void absQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/AddLayer.cc b/runtime/onert/backend/cpu/ops/AddLayer.cc
deleted file mode 100644
index 379215303..000000000
--- a/runtime/onert/backend/cpu/ops/AddLayer.cc
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AddLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void AddLayer::addFloat32()
-{
-  float output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.float_activation_max = output_activation_max;
-  op_params.float_activation_min = output_activation_min;
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void AddLayer::addInt32()
-{
-  int32_t output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.quantized_activation_max = output_activation_max;
-  op_params.quantized_activation_min = output_activation_min;
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-}
-
-void AddLayer::addQuant8()
-{
-  int32_t output_activation_min, output_activation_max;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.quantized_activation_max = output_activation_max;
-  op_params.quantized_activation_min = output_activation_min;
-  // Parameters for scaled quantized computation
-  op_params.left_shift = 20;
-  // Zero-points of input and output tensors
-  op_params.input1_offset = -_lhs->data_offset();
-  op_params.input2_offset = -_rhs->data_offset();
-  op_params.output_offset = _output->data_offset();
-  assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
-  assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
-  assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
-
-  // Compute normalized scale for _lhs and _rhs values,
-  // and represent in 32-bit fixed point
-  const double norm_max_scale = 2 * std::max(_lhs->data_scale(), _rhs->data_scale());
-  const double real_lhs_scale = _lhs->data_scale() / norm_max_scale;
-  const double real_rhs_scale = _rhs->data_scale() / norm_max_scale;
-  // output scale is used to normalize final result, so we invert the scale here
-  const double real_output_scale =
-      norm_max_scale / (_output->data_scale() * (1 << op_params.left_shift));
-
-  // Represent the scales as fixed int32_t multipliers, and int32_t shifts
-  QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
-  QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
-  QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
-
-  // cker quant8 add is not implemented yet
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void AddLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                         const ir::Activation activation, IPortableTensor *output)
-{
-  assert(lhs != nullptr);
-  assert(rhs != nullptr);
-  assert(output != nullptr);
-
-  _lhs = lhs;
-  _rhs = rhs;
-  _activation = activation;
-  _output = output;
-}
-
-void AddLayer::run()
-{
-  if (_lhs->data_type() == OperandType::FLOAT32)
-  {
-    addFloat32();
-  }
-  else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    addQuant8();
-  }
-  else if (_output->data_type() == OperandType::INT32)
-  {
-    addInt32();
-  }
-  else
-  {
-    throw std::runtime_error{"Add: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/AddLayer.h b/runtime/onert/backend/cpu/ops/AddLayer.h
deleted file mode 100644
index 91030d93a..000000000
--- a/runtime/onert/backend/cpu/ops/AddLayer.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AddLayer : public ::onert::exec::IFunction
-{
-public:
-  AddLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
-  {
-    // DO NOTHING
-  }
-
-public:
-  void addFloat32();
-
-  void addQuant8();
-
-  void addInt32();
-
-  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                 const ir::Activation activation, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_lhs;
-  const IPortableTensor *_rhs;
-  IPortableTensor *_output;
-
-  ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc b/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc
deleted file mode 100644
index 9c22c1c86..000000000
--- a/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AvgPoolLayer.h"
-
-#include <cker/operation/AveragePool.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-#define AVGPOOLING_PARAMETERS                            \
-  nnfw::cker::PoolParams op_params;                      \
-  op_params.stride_height = _strideHeight;               \
-  op_params.stride_width = _strideWidth;                 \
-  op_params.filter_height = _kernelHeight;               \
-  op_params.filter_width = _kernelWidth;                 \
-  op_params.padding_values.height = (int8_t)_paddingTop; \
-  op_params.padding_values.width = (int8_t)_paddingLeft;
-
-AvgPoolLayer::AvgPoolLayer()
-    : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
-      _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0),
-      _activation(ir::Activation::NONE)
-{
-  // DO NOTHING
-}
-
-void AvgPoolLayer::averagePoolFloat32()
-{
-  AVGPOOLING_PARAMETERS
-  float output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  op_params.float_activation_min = output_activation_min;
-  op_params.float_activation_max = output_activation_max;
-
-  nnfw::cker::AveragePool(op_params, getTensorShape(_input),
-                          reinterpret_cast<const float *>(_input->buffer()),
-                          getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-void AvgPoolLayer::averagePoolQuant8()
-{
-  AVGPOOLING_PARAMETERS
-  int32_t output_activation_min = 0;
-  int32_t output_activation_max = 0;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-
-  nnfw::cker::AveragePool(op_params, getTensorShape(_input),
-                          reinterpret_cast<const uint8_t *>(_input->buffer()),
-                          getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void AvgPoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft,
-                             const uint32_t paddingRight, const uint32_t paddingTop,
-                             const uint32_t paddingBottom, const uint32_t strideWidth,
-                             const uint32_t strideHeight, const uint32_t kernelWidth,
-                             const uint32_t kernelHeight, const ir::Activation activation,
-                             IPortableTensor *output)
-{
-  assert(input != nullptr);
-  assert(output != nullptr);
-
-  _input = input;
-  _paddingLeft = paddingLeft;
-  _paddingRight = paddingRight;
-  _paddingTop = paddingTop;
-  _paddingBottom = paddingBottom;
-  _strideWidth = strideWidth;
-  _strideHeight = strideHeight;
-  _kernelWidth = kernelWidth;
-  _kernelHeight = kernelHeight;
-  _activation = activation;
-  _output = output;
-}
-
-void AvgPoolLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    averagePoolFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    averagePoolQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"AvgPool: unsupported data type"};
-  }
-}
-
-#undef AVGPOOLING_PARAMETERS
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/AvgPoolLayer.h b/runtime/onert/backend/cpu/ops/AvgPoolLayer.h
deleted file mode 100644
index d4e8f79e7..000000000
--- a/runtime/onert/backend/cpu/ops/AvgPoolLayer.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AvgPoolLayer : public ::onert::exec::IFunction
-{
-public:
-  AvgPoolLayer();
-
-public:
-  void averagePoolFloat32();
-
-  void averagePoolQuant8();
-
-  void configure(const IPortableTensor *input, const uint32_t paddingLeft,
-                 const uint32_t paddingRight, const uint32_t paddingTop,
-                 const uint32_t paddingBottom, const uint32_t strideWidth,
-                 const uint32_t strideHeight, const uint32_t kernelWidth,
-                 const uint32_t kernelHeight, const ir::Activation activation,
-                 IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-
-  uint32_t _paddingLeft;
-  uint32_t _paddingTop;
-  uint32_t _paddingRight;
-  uint32_t _paddingBottom;
-
-  uint32_t _strideWidth;
-  uint32_t _strideHeight;
-  uint32_t _kernelWidth;
-  uint32_t _kernelHeight;
-
-  ir::Activation _activation;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc
new file mode 100644
index 000000000..f50c63375
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BinaryArithmeticLayer.h"
+
+#include <cker/operation/BinaryArithmeticOps.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+
+template <nnfw::cker::BinaryArithmeticOpType arithmetic_type, typename T>
+void eval(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+          nnfw::cker::BinaryArithmeticOpParam op_params)
+{
+  const bool need_broadcast =
+      nnfw::cker::ProcessBroadcastShapes(getTensorShape(lhs), getTensorShape(rhs), &op_params);
+  if (need_broadcast)
+  {
+    nnfw::cker::BroadcastBinaryArithmeticOp<arithmetic_type>(
+        op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+        getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+        reinterpret_cast<T *>(output->buffer()));
+    return;
+  }
+
+  nnfw::cker::BinaryArithmeticOp<arithmetic_type>(
+      op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+      getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+      reinterpret_cast<T *>(output->buffer()));
+}
+
+template <nnfw::cker::BinaryArithmeticOpType arithmetic_type>
+std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)>
+generateKernelGeneric(const IPortableTensor *lhs, const ir::Activation activation,
+                      nnfw::cker::BinaryArithmeticOpParam op_params)
+{
+  switch (lhs->data_type())
+  {
+    case OperandType::FLOAT32:
+    {
+      float output_activation_min = 0, output_activation_max = 0;
+      CalculateActivationRange(activation, &output_activation_min, &output_activation_max);
+      op_params.float_activation_max = output_activation_max;
+      op_params.float_activation_min = output_activation_min;
+      return std::bind(&eval<arithmetic_type, float>, std::placeholders::_1, std::placeholders::_2,
+                       std::placeholders::_3, op_params);
+      break;
+    }
+    case OperandType::INT32:
+    {
+      int32_t output_activation_min = 0, output_activation_max = 0;
+      CalculateActivationRange(activation, &output_activation_min, &output_activation_max);
+      op_params.quantized_activation_max = output_activation_max;
+      op_params.quantized_activation_min = output_activation_min;
+      return std::bind(eval<arithmetic_type, int32_t>, std::placeholders::_1, std::placeholders::_2,
+                       std::placeholders::_3, op_params);
+      break;
+    }
+    default:
+      throw std::runtime_error{"BinaryArithmetic(generic): Unsupported data type"};
+  }
+}
+
+void setAddOrSubQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                             IPortableTensor *output, ir::Activation activation,
+                             nnfw::cker::BinaryArithmeticOpParam *params)
+{
+  int32_t output_activation_min, output_activation_max;
+  CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+  nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.quantized_activation_min = output_activation_min;
+  // Parameters for scaled quantized computation
+  op_params.left_shift = 20;
+  // Zero-points of input and output tensors
+  op_params.input1_offset = -lhs->data_offset();
+  op_params.input2_offset = -rhs->data_offset();
+  op_params.output_offset = output->data_offset();
+  assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
+  assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
+  assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
+
+  // Compute normalized scale for _lhs and _rhs values,
+  // and represent in 32-bit fixed point
+  const double norm_max_scale = 2 * std::max(lhs->data_scale(), rhs->data_scale());
+  const double real_lhs_scale = lhs->data_scale() / norm_max_scale;
+  const double real_rhs_scale = rhs->data_scale() / norm_max_scale;
+  // output scale is used to normalize final result, so we invert the scale here
+  const double real_output_scale =
+      norm_max_scale / (output->data_scale() * (1 << op_params.left_shift));
+
+  // Represent the scales as fixed int32_t multipliers, and int32_t shifts
+  QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
+  QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
+  QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
+}
+
+void setMulQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                        IPortableTensor *output, ir::Activation activation,
+                        nnfw::cker::BinaryArithmeticOpParam *params)
+{
+  int32_t output_activation_min, output_activation_max;
+  CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+  nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
+
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.input1_offset = -lhs->data_offset();
+  op_params.input2_offset = -rhs->data_offset();
+  op_params.output_offset = output->data_offset();
+
+  double real_multiplier = lhs->data_scale() * rhs->data_scale() / output->data_scale();
+  QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift);
+}
+
+} // namespace
+
+void BinaryArithmeticLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                                      IPortableTensor *output, const ir::Activation activation,
+                                      const ArithmeticType arithmetic_type)
+{
+  assert(lhs != nullptr);
+  assert(rhs != nullptr);
+  assert(output != nullptr);
+
+  _lhs = lhs;
+  _rhs = rhs;
+  _output = output;
+
+  nnfw::cker::BinaryArithmeticOpParam op_params;
+  switch (arithmetic_type)
+  {
+    case ArithmeticType::kAdd:
+      if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+        _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::ADD, uint8_t>,
+                            std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+                            op_params);
+      }
+      else
+      {
+        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::ADD>(_lhs, activation,
+                                                                                 op_params);
+      }
+      break;
+    case ArithmeticType::kSub:
+      if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+        op_params.input2_multiplier *= -1;
+        _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::SUB, uint8_t>,
+                            std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+                            op_params);
+      }
+      else
+      {
+        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::SUB>(_lhs, activation,
+                                                                                 op_params);
+      }
+      break;
+    case ArithmeticType::kMul:
+      if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        nnfw::cker::BinaryArithmeticOpParam op_params;
+        setMulQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+        _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::MUL, uint8_t>,
+                            std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+                            op_params);
+      }
+      else
+      {
+        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::MUL>(_lhs, activation,
+                                                                                 op_params);
+      }
+      break;
+    case ArithmeticType::kDiv:
+      if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        throw std::runtime_error{
+            "BinaryArithmetic(Div): Div operation does not support quantization"};
+      }
+      else if (_lhs->data_type() == OperandType::INT32)
+      {
+        throw std::runtime_error{"BinaryArithmetic(Div): Unsupported data type"};
+      }
+      else
+      {
+        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::DIV>(_lhs, activation,
+                                                                                 op_params);
+      }
+      break;
+    default:
+      throw std::runtime_error{"BinaryArithmetic: Unsupported BinaryArithmetic type"};
+  }
+}
+
+void BinaryArithmeticLayer::run() { _kernel(_lhs, _rhs, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/DivLayer.h b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h
index 9411be76e..d6b33ad07 100644
--- a/runtime/onert/backend/cpu/ops/DivLayer.h
+++ b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
 
 #include <backend/IPortableTensor.h>
 #include "OperationUtils.h"
@@ -31,21 +31,25 @@ namespace cpu
 namespace ops
 {
 
-class DivLayer : public ::onert::exec::IFunction
+enum class ArithmeticType
+{
+  kAdd,
+  kSub,
+  kMul,
+  kDiv,
+};
+
+class BinaryArithmeticLayer : public ::onert::exec::IFunction
 {
 public:
-  DivLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+  BinaryArithmeticLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
   {
     // DO NOTHING
   }
 
 public:
-  void divFloat32();
-
-  void divQuant8();
-
-  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                 const ir::Activation activation, IPortableTensor *output);
+  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+                 const ir::Activation activation, const ArithmeticType arithmetic_type);
 
   void run() override;
 
@@ -54,7 +58,7 @@ private:
   const IPortableTensor *_rhs;
   IPortableTensor *_output;
 
-  ir::Activation _activation{ir::Activation::NONE};
+  std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> _kernel;
 };
 
 } // namespace ops
@@ -62,4 +66,4 @@ private:
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/CastLayer.cc b/runtime/onert/backend/cpu/ops/CastLayer.cc
deleted file mode 100644
index 497515606..000000000
--- a/runtime/onert/backend/cpu/ops/CastLayer.cc
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CastLayer.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-CastLayer::CastLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void CastLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-template <typename FromT, typename ToT> void CastLayer::castTensor(const FromT *in, ToT *out)
-{
-  auto input_shape = getTensorShape(_input);
-  auto output_shape = getTensorShape(_output);
-  const auto num_elements = MatchingFlatSize(input_shape, output_shape);
-
-  std::transform(in, in + num_elements, out, [](FromT a) { return static_cast<ToT>(a); });
-}
-
-template <typename FromT> void CastLayer::castPtr(const FromT *in, DataPtr out)
-{
-  switch (_output->data_type())
-  {
-    case ir::DataType::FLOAT32:
-      castTensor(in, out.f);
-      return;
-    case ir::DataType::INT32:
-      castTensor(in, out.i32);
-      return;
-    case ir::DataType::UINT32:
-      castTensor(in, out.u32);
-      return;
-    case ir::DataType::UINT8:
-      castTensor(in, out.u8);
-      return;
-    case ir::DataType::BOOL8:
-      castTensor(in, out.b);
-      return;
-    case ir::DataType::INT64:
-      castTensor(in, out.i64);
-      return;
-    default:
-      throw std::runtime_error("Not supported output type" +
-                               std::to_string((int)_output->data_type()));
-  }
-}
-
-void CastLayer::run()
-{
-  auto input_buf = _input->buffer();
-  auto output_buf = _output->buffer();
-  const auto in = *reinterpret_cast<const DataPtr *>(&input_buf);
-  auto out = *reinterpret_cast<DataPtr *>(&output_buf);
-
-  switch (_input->data_type())
-  {
-    case ir::DataType::FLOAT32:
-      castPtr(in.f, out);
-      return;
-    case ir::DataType::INT32:
-      castPtr(in.i32, out);
-      return;
-    case ir::DataType::UINT32:
-      castPtr(in.u32, out);
-      return;
-    case ir::DataType::UINT8:
-      castPtr(in.u8, out);
-      return;
-    case ir::DataType::BOOL8:
-      castPtr(in.b, out);
-      return;
-    case ir::DataType::INT64:
-      castPtr(in.i64, out);
-      return;
-    default:
-      throw std::runtime_error("Cast: unsupported data type" +
-                               std::to_string((int)_input->data_type()));
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/CastLayer.h b/runtime/onert/backend/cpu/ops/CastLayer.h
deleted file mode 100644
index 290c722e2..000000000
--- a/runtime/onert/backend/cpu/ops/CastLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class CastLayer : public ::onert::exec::IFunction
-{
-public:
-  CastLayer();
-
-public:
-  template <typename FromT, typename ToT> void castTensor(const FromT *in, ToT *out);
-  template <typename FromT> void castPtr(const FromT *in, DataPtr out);
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
index 2d5bbef1e..c057267d3 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
@@ -31,7 +31,8 @@ namespace ops
 ConvolutionLayer::ConvolutionLayer()
     : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
       _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
-      _paddingBottom(0), _strideWidth(0), _strideHeight(0), _activation(ir::Activation::NONE),
+      _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
+      _dilationHeightFactor(1), _activation(ir::Activation::NONE),
       _conv_kernel(new nnfw::cker::Conv()), _prepare(false)
 {
   // DO NOTHING
@@ -50,8 +51,8 @@ void ConvolutionLayer::convFloat32()
   op_params.padding_values.height = _paddingTop;
   op_params.stride_width = _strideWidth;
   op_params.stride_height = _strideHeight;
-  op_params.dilation_width_factor = 1;
-  op_params.dilation_height_factor = 1;
+  op_params.dilation_width_factor = _dilationWidthFactor;
+  op_params.dilation_height_factor = _dilationHeightFactor;
   op_params.float_activation_min = output_activation_min;
   op_params.float_activation_max = output_activation_max;
 
@@ -78,8 +79,8 @@ void ConvolutionLayer::convQuant8()
   nnfw::cker::ConvParams op_params;
   op_params.stride_width = _strideWidth;
   op_params.stride_height = _strideHeight;
-  op_params.dilation_width_factor = 1;
-  op_params.dilation_height_factor = 1;
+  op_params.dilation_width_factor = _dilationWidthFactor;
+  op_params.dilation_height_factor = _dilationHeightFactor;
   op_params.padding_type = getPaddingType(_paddingType);
   op_params.padding_values.width = _paddingLeft;
   op_params.padding_values.height = _paddingTop;
@@ -104,6 +105,8 @@ void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTe
                                  const uint32_t paddingLeft, const uint32_t paddingRight,
                                  const uint32_t paddingTop, const uint32_t paddingBottom,
                                  const uint32_t strideWidth, const uint32_t strideHeight,
+                                 const uint32_t dilationWidthFactor,
+                                 const uint32_t dilationHeightFactor,
                                  const ir::Activation activation, IPortableTensor *output)
 {
   _input = input;
@@ -116,6 +119,8 @@ void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTe
   _paddingBottom = paddingBottom;
   _strideWidth = strideWidth;
   _strideHeight = strideHeight;
+  _dilationWidthFactor = dilationWidthFactor;
+  _dilationHeightFactor = dilationHeightFactor;
   _activation = activation;
   _output = output;
 }
@@ -145,7 +150,8 @@ void ConvolutionLayer::run()
     param_padding.param.bottom = _paddingBottom;
 
     const auto padding =
-        ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
+        ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                             _dilationWidthFactor, _dilationHeightFactor);
 
     _paddingLeft = padding.left;
     _paddingRight = padding.right;
@@ -176,7 +182,8 @@ void ConvolutionLayer::prepare()
   {
     bool is_transposed = false;
     kernel.prepare(getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
-                   getPaddingType(_paddingType), is_transposed);
+                   getPaddingType(_paddingType), is_transposed, _dilationWidthFactor,
+                   _dilationHeightFactor);
 
     // Decrease reference of _kernel(weights) only when _kernel is constant
     if (is_transposed)
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
index 2833387c4..398892e65 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
@@ -56,7 +56,8 @@ public:
                  const IPortableTensor *bias, ir::PaddingType _paddingType,
                  const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
                  const uint32_t paddingBottom, const uint32_t strideWidth,
-                 const uint32_t strideHeight, const ir::Activation activation,
+                 const uint32_t strideHeight, const uint32_t dilationWidthFactor,
+                 const uint32_t dilationHeightFactor, const ir::Activation activation,
                  IPortableTensor *output);
 
   void run() override;
@@ -77,6 +78,8 @@ private:
 
   uint32_t _strideWidth;
   uint32_t _strideHeight;
+  uint32_t _dilationWidthFactor;
+  uint32_t _dilationHeightFactor;
 
   ir::Activation _activation;
 
diff --git a/runtime/onert/backend/cpu/ops/CosLayer.cc b/runtime/onert/backend/cpu/ops/CosLayer.cc
deleted file mode 100644
index 9417019d5..000000000
--- a/runtime/onert/backend/cpu/ops/CosLayer.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CosLayer.h"
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-CosLayer::CosLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void CosLayer::cosFloat32()
-{
-  nnfw::cker::Cos(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                  getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void CosLayer::cosQuant8() { throw std::runtime_error{"NYI"}; }
-
-void CosLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void CosLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    cosFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    cosQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Cos: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/CosLayer.h b/runtime/onert/backend/cpu/ops/CosLayer.h
deleted file mode 100644
index 1fadef718..000000000
--- a/runtime/onert/backend/cpu/ops/CosLayer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class CosLayer : public ::onert::exec::IFunction
-{
-public:
-  CosLayer();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  void cosFloat32();
-  void cosQuant8();
-
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/DivLayer.cc b/runtime/onert/backend/cpu/ops/DivLayer.cc
deleted file mode 100644
index 556c55e33..000000000
--- a/runtime/onert/backend/cpu/ops/DivLayer.cc
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DivLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void DivLayer::divFloat32()
-{
-  float output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.float_activation_max = output_activation_max;
-  op_params.float_activation_min = output_activation_min;
-
-  const bool requires_broadcast = !HaveSameShapes(_lhs, _rhs);
-  if (requires_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::DIV>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-  }
-  else
-  {
-    nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::DIV>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-  }
-}
-
-void DivLayer::divQuant8()
-{
-  int32_t output_activation_min, output_activation_max;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
-  // op_params.quantized_activation_max = output_activation_max;
-  // op_params.quantized_activation_min = output_activation_min;
-
-  // cker quant8 div is not implemented yet
-  throw std::runtime_error{"Div NYI for quantized"};
-}
-
-void DivLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                         const ir::Activation activation, IPortableTensor *output)
-{
-  _lhs = lhs;
-  _rhs = rhs;
-  _activation = activation;
-  _output = output;
-}
-
-void DivLayer::run()
-{
-  if (_output->data_type() == OperandType::FLOAT32)
-  {
-    divFloat32();
-  }
-  else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    divQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Div: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
new file mode 100644
index 000000000..c1d63172b
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseActivationLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Logistic.h>
+#include <cker/operation/ReLU.h>
+#include <cker/operation/ReLU6.h>
+#include <cker/operation/Tanh.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+ElementwiseActivationLayer::ElementwiseActivationLayer()
+    : _input(nullptr), _output(nullptr), _kernel()
+{
+  // DO NOTHING
+}
+
+void ElementwiseActivationLayer::PopulateLookupTable(const ElementwiseActivationType op_type)
+{
+  const auto input_scale = static_cast<double>(_input->data_scale());
+  const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
+  const auto output_scale = static_cast<double>(_output->data_scale());
+  const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
+  const float inverse_scale = 1 / output_scale;
+  int32_t maxval = std::numeric_limits<uint8_t>::max();
+  int32_t minval = std::numeric_limits<uint8_t>::min();
+  for (int32_t val = minval; val <= maxval; ++val)
+  {
+    const float dequantized = input_scale * (val - input_zero_point);
+    float transformed = 0.f;
+    if (op_type == ElementwiseActivationType::kTanh)
+    {
+      transformed = std::tanh(dequantized);
+    }
+    else if (op_type == ElementwiseActivationType::kLogistic)
+    {
+      transformed = 1.0f / (1.0f + std::exp(-dequantized));
+    }
+    else
+    {
+      throw std::runtime_error("ElementwiseActivationLayer : unsupported activation type");
+    }
+    const float rescaled = std::round(transformed * inverse_scale);
+    const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+    _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
+  }
+}
+
+void ElementwiseActivationLayer::EvalUsingLookupTable(const IPortableTensor *input,
+                                                      IPortableTensor *output)
+{
+  const int size = MatchingFlatSize(getTensorShape(input), getTensorShape(output));
+  const uint8_t *input_data = reinterpret_cast<const uint8_t *>(input->buffer());
+  uint8_t *output_data = reinterpret_cast<uint8_t *>(output->buffer());
+
+  for (int i = 0; i < size; ++i)
+  {
+    output_data[i] = _table[input_data[i]];
+  }
+}
+
+void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+                                           float alpha, float beta,
+                                           ElementwiseActivationType op_type)
+{
+  _input = input;
+  _output = output;
+
+  switch (op_type)
+  {
+    case ElementwiseActivationType::kLogistic:
+      if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        PopulateLookupTable(op_type);
+        _kernel = std::bind(&ElementwiseActivationLayer::EvalUsingLookupTable, this,
+                            std::placeholders::_1, std::placeholders::_2);
+      }
+      else if (_input->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+          nnfw::cker::Logistic(getTensorShape(input),
+                               reinterpret_cast<const float *>(input->buffer()),
+                               getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+        };
+      }
+      else
+      {
+        throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
+      }
+      break;
+    case ElementwiseActivationType::kReLU:
+      if (_input->data_type() == OperandType::FLOAT32)
+      {
+        if (alpha == std::numeric_limits<float>::infinity() && beta == 0.f)
+        {
+          _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+            nnfw::cker::ReLU(getTensorShape(input),
+                             reinterpret_cast<const float *>(input->buffer()),
+                             getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+          };
+        }
+        else if (alpha == 6.f && beta == 0.f)
+        {
+          _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+            nnfw::cker::ReLU6(getTensorShape(input),
+                              reinterpret_cast<const float *>(input->buffer()),
+                              reinterpret_cast<float *>(output->buffer()));
+          };
+        }
+        else
+        {
+          throw std::runtime_error(
+              "ElementwiseActivationLayer : This layer suppports only ReLU(0-inf) and ReLU6(0-6)");
+        }
+      }
+      else
+      {
+        throw std::runtime_error{"ElementwiseActivationLayer(ReLU): unsupported data type"};
+      }
+      break;
+    case ElementwiseActivationType::kTanh:
+      if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        PopulateLookupTable(op_type);
+        _kernel = std::bind(&ElementwiseActivationLayer::EvalUsingLookupTable, this,
+                            std::placeholders::_1, std::placeholders::_2);
+      }
+      else if (_input->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+          nnfw::cker::Tanh(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                           getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+        };
+      }
+      else
+      {
+        throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
+      }
+      break;
+    default:
+      throw std::runtime_error("ElementwiseActivationLayer: unsupported op type");
+  }
+}
+
+void ElementwiseActivationLayer::run() { _kernel(_input, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/TanhLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
index 35a184074..3ef580041 100644
--- a/runtime/onert/backend/cpu/ops/TanhLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
 
 #include <backend/IPortableTensor.h>
 
@@ -30,26 +30,33 @@ namespace cpu
 namespace ops
 {
 
-class TanhLayer : public ::onert::exec::IFunction
+enum class ElementwiseActivationType
 {
-public:
-  TanhLayer();
+  kLogistic,
+  kReLU,
+  kTanh
+};
 
+class ElementwiseActivationLayer : public ::onert::exec::IFunction
+{
 public:
-  void tanhFloat32();
+  ElementwiseActivationLayer();
 
-  void tanhQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
+public:
+  void configure(const IPortableTensor *input, IPortableTensor *output, float alpha, float beta,
+                 const ElementwiseActivationType op_type);
 
   void run() override;
 
-  void PopulateLookupTable();
+  void PopulateLookupTable(const ElementwiseActivationType op_type);
+
+  void EvalUsingLookupTable(const IPortableTensor *input, IPortableTensor *output);
 
 private:
   const IPortableTensor *_input;
   IPortableTensor *_output;
   uint8_t _table[256];
+  std::function<void(const IPortableTensor *input, IPortableTensor *output)> _kernel;
 };
 
 } // namespace ops
@@ -57,4 +64,4 @@ private:
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
new file mode 100644
index 000000000..ea3c1e7cd
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseBinaryLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/LogicalOr.h>
+#include <cker/operation/MaxMin.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+template <typename T>
+void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                      IPortableTensor *output)
+{
+  if (!HaveSameShapes(lhs, rhs))
+  {
+    nnfw::cker::LogicalOrBroadcast<T>(
+        getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs),
+        reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+        reinterpret_cast<T *>(output->buffer()));
+  }
+  else
+  {
+    nnfw::cker::LogicalOrElementwise<T>(
+        getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+        reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer()));
+  }
+}
+
+template <typename T>
+void maximumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
+{
+  nnfw::cker::Max<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+                     getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+                     getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+void minimumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
+{
+  nnfw::cker::Min<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+                     getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+                     getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+}
+
+bool haveSameQauntInfo(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                       const IPortableTensor *output)
+{
+  return (lhs->data_scale() == rhs->data_scale() && lhs->data_scale() == output->data_scale()) &&
+         (lhs->data_offset() == rhs->data_offset() && lhs->data_offset() == output->data_offset());
+}
+} // namespace
+
+void ElementwiseBinaryLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                                       IPortableTensor *output, const ElementwiseBinaryType op_type)
+{
+  assert(lhs != nullptr);
+  assert(rhs != nullptr);
+  assert(output != nullptr);
+
+  _lhs = lhs;
+  _rhs = rhs;
+  _output = output;
+
+  switch (op_type)
+  {
+    case ElementwiseBinaryType::kLogicalOr:
+      if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
+      {
+        _kernel = logicalOrGeneric<bool>;
+      }
+      else
+      {
+        throw std::runtime_error{"LogicalOr: Unsupported data type"};
+      }
+      break;
+    case ElementwiseBinaryType::kMax:
+      if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        if (!haveSameQauntInfo(_lhs, _rhs, _output))
+        {
+          throw std::runtime_error("Max NYI for quantized");
+        }
+        _kernel = maximumGeneric<uint8_t>;
+      }
+      else if (_lhs->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = maximumGeneric<float>;
+      }
+      else
+      {
+        throw std::runtime_error{"Max: unsupported data type"};
+      }
+      break;
+    case ElementwiseBinaryType::kMin:
+      if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        if (!haveSameQauntInfo(_lhs, _rhs, _output))
+        {
+          throw std::runtime_error("Min NYI for quantized");
+        }
+        _kernel = minimumGeneric<uint8_t>;
+      }
+      else if (_lhs->data_type() == OperandType::INT32)
+      {
+        _kernel = minimumGeneric<int32_t>;
+      }
+      else if (_lhs->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = minimumGeneric<float>;
+      }
+      else
+      {
+        throw std::runtime_error{"Min: unsupported data type"};
+      }
+      break;
+    default:
+      throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"};
+  }
+}
+
+void ElementwiseBinaryLayer::run() { _kernel(_lhs, _rhs, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MaxLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h
index ed8dc5b0f..052747a4c 100644
--- a/runtime/onert/backend/cpu/ops/MaxLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
 
 #include <backend/IPortableTensor.h>
 
@@ -30,20 +30,25 @@ namespace cpu
 namespace ops
 {
 
-class MaxLayer : public ::onert::exec::IFunction
+enum class ElementwiseBinaryType
+{
+  kLogicalAnd,
+  kLogicalOr,
+  kMax,
+  kMin,
+};
+
+class ElementwiseBinaryLayer : public ::onert::exec::IFunction
 {
 public:
-  MaxLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+  ElementwiseBinaryLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
   {
     // DO NOTHING
   }
 
 public:
-  template <typename T> void maximum();
-
-  void maxQuant8();
-
-  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output);
+  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+                 const ElementwiseBinaryType op_type);
 
   void run() override;
 
@@ -51,6 +56,7 @@ private:
   const IPortableTensor *_lhs;
   const IPortableTensor *_rhs;
   IPortableTensor *_output;
+  std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> _kernel;
 };
 
 } // namespace ops
@@ -58,4 +64,4 @@ private:
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
new file mode 100644
index 000000000..f8f89ab15
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
@@ -0,0 +1,336 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseUnaryLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Elementwise.h>
+#include <cker/operation/Erf.h>
+#include <cker/operation/Exp.h>
+#include <cker/operation/LogicalNot.h>
+#include <cker/operation/Quantize.h>
+#include <cker/operation/Round.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+void absFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Abs(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename FromT>
+void castPtr(const FromT *in, DataPtr out, int num_elements, ir::DataType data_type_out)
+{
+  switch (data_type_out)
+  {
+    case ir::DataType::FLOAT32:
+      std::transform(in, in + num_elements, out.f, [](FromT a) { return static_cast<float>(a); });
+      return;
+    case ir::DataType::INT32:
+      std::transform(in, in + num_elements, out.i32,
+                     [](FromT a) { return static_cast<int32_t>(a); });
+      return;
+    case ir::DataType::UINT32:
+      std::transform(in, in + num_elements, out.u32,
+                     [](FromT a) { return static_cast<uint32_t>(a); });
+      return;
+    case ir::DataType::UINT8:
+      std::transform(in, in + num_elements, out.u8,
+                     [](FromT a) { return static_cast<uint8_t>(a); });
+      return;
+    case ir::DataType::BOOL8:
+      std::transform(in, in + num_elements, out.b, [](FromT a) { return static_cast<bool>(a); });
+      return;
+    case ir::DataType::INT64:
+      std::transform(in, in + num_elements, out.i64,
+                     [](FromT a) { return static_cast<int64_t>(a); });
+      return;
+    default:
+      throw std::runtime_error("Cast: Not supported output type" +
+                               std::to_string((int)data_type_out));
+  }
+}
+
+void cast(const IPortableTensor *input, IPortableTensor *output)
+{
+  auto input_buf = input->buffer();
+  auto output_buf = output->buffer();
+  const auto in = *reinterpret_cast<const DataPtr *>(&input_buf);
+  auto out = *reinterpret_cast<DataPtr *>(&output_buf);
+
+  auto input_shape = getTensorShape(input);
+  auto output_shape = getTensorShape(output);
+  const auto num_elements = MatchingFlatSize(input_shape, output_shape);
+
+  switch (input->data_type())
+  {
+    case ir::DataType::FLOAT32:
+      castPtr(in.f, out, num_elements, output->data_type());
+      return;
+    case ir::DataType::INT32:
+      castPtr(in.i32, out, num_elements, output->data_type());
+      return;
+    case ir::DataType::UINT32:
+      castPtr(in.u32, out, num_elements, output->data_type());
+      return;
+    case ir::DataType::UINT8:
+      castPtr(in.u8, out, num_elements, output->data_type());
+      return;
+    case ir::DataType::BOOL8:
+      castPtr(in.b, out, num_elements, output->data_type());
+      return;
+    case ir::DataType::INT64:
+      castPtr(in.i64, out, num_elements, output->data_type());
+      return;
+    default:
+      throw std::runtime_error("Cast: unsupported data type" +
+                               std::to_string((int)input->data_type()));
+  }
+}
+
+void cosFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Cos(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void expFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Exp(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void erfFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Erf(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void logFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Log(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void logicalNot(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::LogicalNot(getTensorShape(input), reinterpret_cast<const bool *>(input->buffer()),
+                         getTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
+}
+
+void negFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Neg(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename InputT, typename OutputT>
+void affineQuantize(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Quantize(getTensorShape(input), reinterpret_cast<const InputT *>(input->buffer()),
+                       getTensorShape(output), reinterpret_cast<OutputT *>(output->buffer()),
+                       output->data_scale(), output->data_offset());
+}
+
+void roundFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Round(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                    getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void rsqrtFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Rsqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                    getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void sinFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Sin(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  if (!HaveSameShapes(input, output))
+    throw std::runtime_error{"ZerosLike: input and output shape don't match."};
+
+  auto element_size = getTensorShape(input).FlatSize();
+
+  memset(reinterpret_cast<T *>(output->buffer()), 0, element_size * sizeof(T));
+}
+} // namespace
+
+void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+                                      const ElementwiseUnaryType op_type)
+{
+  assert(input != nullptr);
+  assert(output != nullptr);
+
+  _input = input;
+  _output = output;
+
+  switch (op_type)
+  {
+    case ElementwiseUnaryType::kAbs:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = absFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Abs: Unsupported data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kCast:
+      _kernel = cast;
+      break;
+    case ElementwiseUnaryType::kCos:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = cosFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Cos: Unsupported data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kExp:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = expFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Exp: Unsupported data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kErf:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = erfFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Exp: Unsupported data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kLog:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = logFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Log: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kLogicalNot:
+      if ((input->data_type() == OperandType::BOOL8))
+      {
+        _kernel = logicalNot;
+      }
+      else
+      {
+        throw std::runtime_error{"LogicalNot: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kNeg:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = negFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Neg: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kQuantize:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = affineQuantize<float, uint8_t>;
+      }
+      else
+      {
+        throw std::runtime_error{"Quantize: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kRound:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = roundFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Round: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kRSqrt:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = rsqrtFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"RSqrt: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kSin:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = sinFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Sin: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kZerosLike:
+      if (input->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = zerosLikeFloat32<float>;
+      }
+      else if (input->data_type() == OperandType::INT32)
+      {
+        _kernel = zerosLikeFloat32<int32_t>;
+      }
+      else
+      {
+        throw std::runtime_error{"ZerosLike: Unsupported data type"};
+      }
+      break;
+    default:
+      throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"};
+  }
+}
+
+void ElementwiseUnaryLayer::run() { _kernel(_input, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.h b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
index 994d17a30..74968386d 100644
--- a/runtime/onert/backend/cpu/ops/ReLU6Layer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
 
 #include <backend/IPortableTensor.h>
 
@@ -30,23 +30,41 @@ namespace cpu
 namespace ops
 {
 
-class ReLU6Layer : public ::onert::exec::IFunction
+enum class ElementwiseUnaryType
 {
-public:
-  ReLU6Layer();
+  kAbs,
+  kCast,
+  kCos,
+  kErf,
+  kExp,
+  kLog,
+  kLogicalNot,
+  kNeg,
+  kQuantize,
+  kRound,
+  kRSqrt,
+  kSin,
+  kZerosLike
+};
 
+class ElementwiseUnaryLayer : public ::onert::exec::IFunction
+{
 public:
-  void relu6Float32();
+  ElementwiseUnaryLayer() : _input(nullptr), _output(nullptr), _kernel()
+  {
+    // DO NOTHING
+  }
 
-  void relu6Quant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
+public:
+  void configure(const IPortableTensor *input, IPortableTensor *output,
+                 const ElementwiseUnaryType op_type);
 
   void run() override;
 
 private:
   const IPortableTensor *_input;
   IPortableTensor *_output;
+  std::function<void(const IPortableTensor *, IPortableTensor *)> _kernel;
 };
 
 } // namespace ops
@@ -54,4 +72,4 @@ private:
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ExpLayer.cc b/runtime/onert/backend/cpu/ops/ExpLayer.cc
deleted file mode 100644
index 4dbec9cd5..000000000
--- a/runtime/onert/backend/cpu/ops/ExpLayer.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ExpLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Exp.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ExpLayer::ExpLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void ExpLayer::expFloat32()
-{
-  nnfw::cker::Exp(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                  getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ExpLayer::expQuant8()
-{
-  // cker quant8 exp is not implemented yet
-  throw std::runtime_error{"NYI"};
-}
-
-void ExpLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void ExpLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    expFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    expQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Exp: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogLayer.cc b/runtime/onert/backend/cpu/ops/LogLayer.cc
deleted file mode 100644
index 307c15bc4..000000000
--- a/runtime/onert/backend/cpu/ops/LogLayer.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogLayer::LogLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void LogLayer::logFloat32()
-{
-  nnfw::cker::Log(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                  getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void LogLayer::logQuant8() { throw std::runtime_error{"NYI"}; }
-
-void LogLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void LogLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    logFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    logQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Log: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogLayer.h b/runtime/onert/backend/cpu/ops/LogLayer.h
deleted file mode 100644
index 2f6b4b570..000000000
--- a/runtime/onert/backend/cpu/ops/LogLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogLayer : public ::onert::exec::IFunction
-{
-public:
-  LogLayer();
-
-public:
-  void logFloat32();
-
-  void logQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
index 06dde4fc4..1d7ee6caa 100644
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
@@ -34,6 +34,16 @@ LogSoftMaxLayer::LogSoftMaxLayer() : _input(nullptr), _output(nullptr), _beta(0.
   // DO NOTHING
 }
 
+void LogSoftMaxLayer::PopulateLookupTable(const float kBeta)
+{
+  const float scale = -_input->data_scale() * kBeta;
+  const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+  for (int32_t val = 0; val <= max_uint8; ++val)
+  {
+    _table[max_uint8 - val] = expf(scale * val);
+  }
+}
+
 void LogSoftMaxLayer::logsoftmaxFloat32()
 {
   nnfw::cker::SoftmaxParams op_params;
@@ -46,7 +56,15 @@ void LogSoftMaxLayer::logsoftmaxFloat32()
 
 void LogSoftMaxLayer::logsoftmaxQuant8()
 {
-  // NYI
+  nnfw::cker::SoftmaxParams op_params;
+  op_params.beta = _beta;
+  op_params.axis = _axis;
+  op_params.table = _table;
+  op_params.zero_point = _output->data_offset();
+  op_params.scale = _output->data_scale();
+  nnfw::cker::LogSoftmax(op_params, _input->data_scale(), getTensorShape(_input),
+                         reinterpret_cast<const uint8_t *>(_input->buffer()),
+                         getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
 }
 
 void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta, const int axis,
@@ -56,6 +74,10 @@ void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta,
   _output = output;
   _beta = beta;
   _axis = axis;
+  if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+  {
+    PopulateLookupTable(_beta);
+  }
 }
 
 void LogSoftMaxLayer::run()
@@ -66,7 +88,7 @@ void LogSoftMaxLayer::run()
   }
   else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
   {
-    throw std::runtime_error{"LogSoftmax : NYI"};
+    logsoftmaxQuant8();
   }
   else
   {
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
index ba9deca17..1533f3361 100644
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
@@ -45,12 +45,15 @@ public:
 
   void run();
 
+  void PopulateLookupTable(const float kBeta);
+
 private:
   const IPortableTensor *_input;
   IPortableTensor *_output;
 
   float _beta;
   int _axis;
+  float _table[256];
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc b/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc
deleted file mode 100644
index f2192c148..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogicalNotLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/LogicalNot.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogicalNotLayer::LogicalNotLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void LogicalNotLayer::logicalNotBool8()
-{
-  nnfw::cker::LogicalNot(getTensorShape(_input), reinterpret_cast<const bool *>(_input->buffer()),
-                         getTensorShape(_output), reinterpret_cast<bool *>(_output->buffer()));
-}
-
-void LogicalNotLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void LogicalNotLayer::run()
-{
-  if (_input->data_type() == OperandType::BOOL8)
-  {
-    logicalNotBool8();
-  }
-  else
-  {
-    throw std::runtime_error{"LogicalNot: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogicalNotLayer.h b/runtime/onert/backend/cpu/ops/LogicalNotLayer.h
deleted file mode 100644
index 5543cca3d..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalNotLayer.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogicalNotLayer : public ::onert::exec::IFunction
-{
-public:
-  LogicalNotLayer();
-
-public:
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  void logicalNotBool8();
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc b/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc
deleted file mode 100644
index 5b7c9f6f0..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogicalOrLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/LogicalOr.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-void LogicalOrLayer::lorBool8()
-{
-  if (!HaveSameShapes(_lhs, _rhs))
-  {
-    nnfw::cker::LogicalOrBroadcast<bool>(
-        getTensorShape(_lhs), reinterpret_cast<const bool *>(_lhs->buffer()), getTensorShape(_rhs),
-        reinterpret_cast<const bool *>(_rhs->buffer()), getTensorShape(_output),
-        reinterpret_cast<bool *>(_output->buffer()));
-  }
-  else
-  {
-    nnfw::cker::LogicalOrElementwise<bool>(getTensorShape(_lhs),
-                                           reinterpret_cast<const bool *>(_lhs->buffer()),
-                                           reinterpret_cast<const bool *>(_rhs->buffer()),
-                                           reinterpret_cast<bool *>(_output->buffer()));
-  }
-}
-
-void LogicalOrLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                               IPortableTensor *output)
-{
-  assert(lhs != nullptr);
-  assert(rhs != nullptr);
-  assert(output != nullptr);
-
-  _lhs = lhs;
-  _rhs = rhs;
-  _output = output;
-}
-
-void LogicalOrLayer::run()
-{
-  if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
-  {
-    lorBool8();
-  }
-  else
-  {
-    throw std::runtime_error{"LogicalOr: Unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogicalOrLayer.h b/runtime/onert/backend/cpu/ops/LogicalOrLayer.h
deleted file mode 100644
index efaf396e8..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalOrLayer.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class LogicalOrLayer : public ::onert::exec::IFunction
-{
-public:
-  LogicalOrLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
-  {
-    // Nothing
-  }
-
-public:
-  void configure(const IPortableTensor *_lhs, const IPortableTensor *_rhs, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  void lorBool8();
-
-private:
-  const IPortableTensor *_lhs;
-  const IPortableTensor *_rhs;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogisticLayer.cc b/runtime/onert/backend/cpu/ops/LogisticLayer.cc
deleted file mode 100644
index 140ab4d2c..000000000
--- a/runtime/onert/backend/cpu/ops/LogisticLayer.cc
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogisticLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Logistic.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogisticLayer::LogisticLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void LogisticLayer::populateLookupTable()
-{
-  const auto input_scale = static_cast<double>(_input->data_scale());
-  const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
-  const auto output_scale = static_cast<double>(_output->data_scale());
-  const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
-  const float inverse_scale = 1 / output_scale;
-  int32_t maxval = std::numeric_limits<uint8_t>::max();
-  int32_t minval = std::numeric_limits<uint8_t>::min();
-  for (int32_t val = minval; val <= maxval; ++val)
-  {
-    const float dequantized = input_scale * (val - input_zero_point);
-    const float transformed = 1.0f / (1.0f + std::exp(-dequantized));
-    const float rescaled = std::round(transformed * inverse_scale);
-    const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
-    _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
-  }
-}
-
-void LogisticLayer::logisticFloat32()
-{
-  nnfw::cker::Logistic(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                       getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void LogisticLayer::logisticQuant8()
-{
-  const int size = MatchingFlatSize(getTensorShape(_input), getTensorShape(_output));
-  const uint8_t *input_data = reinterpret_cast<const uint8_t *>(_input->buffer());
-  uint8_t *output_data = reinterpret_cast<uint8_t *>(_output->buffer());
-
-  for (int i = 0; i < size; ++i)
-  {
-    output_data[i] = _table[input_data[i]];
-  }
-}
-
-void LogisticLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-
-  if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    if (_output->data_scale() != 1.f / 256)
-    {
-      throw std::runtime_error{"incorrect scale for output"};
-    }
-    populateLookupTable();
-  }
-}
-
-void LogisticLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    logisticFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    logisticQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Logistic: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogisticLayer.h b/runtime/onert/backend/cpu/ops/LogisticLayer.h
deleted file mode 100644
index cac77939d..000000000
--- a/runtime/onert/backend/cpu/ops/LogisticLayer.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogisticLayer : public ::onert::exec::IFunction
-{
-public:
-  LogisticLayer();
-
-public:
-  void logisticFloat32();
-
-  void logisticQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-  void populateLookupTable();
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-
-  uint8_t _table[256];
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/MaxLayer.cc b/runtime/onert/backend/cpu/ops/MaxLayer.cc
deleted file mode 100644
index 9631983be..000000000
--- a/runtime/onert/backend/cpu/ops/MaxLayer.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MaxLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/MaxMin.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-template <typename T> void MaxLayer::maximum()
-{
-  nnfw::cker::Max<T>(getTensorShape(_lhs), reinterpret_cast<const T *>(_lhs->buffer()),
-                     getTensorShape(_rhs), reinterpret_cast<const T *>(_rhs->buffer()),
-                     getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
-}
-
-void MaxLayer::maxQuant8()
-{
-  if (_lhs->data_scale() == _rhs->data_scale() && _lhs->data_scale() == _output->data_scale())
-  {
-    if (_lhs->data_offset() == _rhs->data_offset() && _lhs->data_offset() == _output->data_offset())
-    {
-      return nnfw::cker::Max<uint8_t>(
-          getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-          getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-          getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-    }
-  }
-  throw std::runtime_error("Max NYI for quantized");
-}
-
-void MaxLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                         IPortableTensor *output)
-{
-  assert(lhs != nullptr);
-  assert(rhs != nullptr);
-  assert(output != nullptr);
-
-  _lhs = lhs;
-  _rhs = rhs;
-  _output = output;
-}
-
-void MaxLayer::run()
-{
-  if (_lhs->data_type() == OperandType::FLOAT32)
-  {
-    maximum<float>();
-  }
-  else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    maxQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Max: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc b/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc
deleted file mode 100644
index 1e983b408..000000000
--- a/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MaxPoolLayer.h"
-
-#include <cker/operation/MaxPool.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-#define MAXPOOLING_PARAMETERS                            \
-  nnfw::cker::PoolParams op_params;                      \
-  op_params.stride_height = _strideHeight;               \
-  op_params.stride_width = _strideWidth;                 \
-  op_params.filter_height = _kernelHeight;               \
-  op_params.filter_width = _kernelWidth;                 \
-  op_params.padding_values.height = (int8_t)_paddingTop; \
-  op_params.padding_values.width = (int8_t)_paddingLeft;
-
-MaxPoolLayer::MaxPoolLayer()
-    : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
-      _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0),
-      _activation(ir::Activation::NONE)
-{
-  // DO NOTHING
-}
-
-void MaxPoolLayer::maxPoolFloat32()
-{
-  MAXPOOLING_PARAMETERS
-  float output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  op_params.float_activation_min = output_activation_min;
-  op_params.float_activation_max = output_activation_max;
-
-  nnfw::cker::MaxPool(op_params, getTensorShape(_input),
-                      reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
-                      reinterpret_cast<float *>(_output->buffer()));
-}
-void MaxPoolLayer::maxPoolQuant8()
-{
-  MAXPOOLING_PARAMETERS
-  int32_t output_activation_min = 0;
-  int32_t output_activation_max = 0;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-
-  nnfw::cker::MaxPool(op_params, getTensorShape(_input),
-                      reinterpret_cast<const uint8_t *>(_input->buffer()), getTensorShape(_output),
-                      reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void MaxPoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft,
-                             const uint32_t paddingRight, const uint32_t paddingTop,
-                             const uint32_t paddingBottom, const uint32_t strideWidth,
-                             const uint32_t strideHeight, const uint32_t kernelWidth,
-                             const uint32_t kernelHeight, const ir::Activation activation,
-                             IPortableTensor *output)
-{
-  _input = input;
-  _paddingLeft = paddingLeft;
-  _paddingRight = paddingRight;
-  _paddingTop = paddingTop;
-  _paddingBottom = paddingBottom;
-  _strideWidth = strideWidth;
-  _strideHeight = strideHeight;
-  _kernelWidth = kernelWidth;
-  _kernelHeight = kernelHeight;
-  _activation = activation;
-  _output = output;
-}
-
-void MaxPoolLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    maxPoolFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    maxPoolQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"MaxPool: unsupported data type"};
-  }
-}
-
-#undef MAXPOOLING_PARAMETERS
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MinLayer.cc b/runtime/onert/backend/cpu/ops/MinLayer.cc
deleted file mode 100644
index 20859673b..000000000
--- a/runtime/onert/backend/cpu/ops/MinLayer.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MinLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/MaxMin.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-template <typename T> void MinLayer::minimum()
-{
-  nnfw::cker::Min<T>(getTensorShape(_lhs), reinterpret_cast<const T *>(_lhs->buffer()),
-                     getTensorShape(_rhs), reinterpret_cast<const T *>(_rhs->buffer()),
-                     getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
-}
-
-void MinLayer::minQuant8()
-{
-  if (_lhs->data_scale() == _rhs->data_scale() && _lhs->data_scale() == _output->data_scale())
-  {
-    if (_lhs->data_offset() == _rhs->data_offset() && _lhs->data_offset() == _output->data_offset())
-    {
-      return nnfw::cker::Min<uint8_t>(
-          getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-          getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-          getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-    }
-  }
-  throw std::runtime_error("Min NYI for quantized");
-}
-
-void MinLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                         IPortableTensor *output)
-{
-  assert(lhs != nullptr);
-  assert(rhs != nullptr);
-  assert(output != nullptr);
-
-  _lhs = lhs;
-  _rhs = rhs;
-  _output = output;
-}
-
-void MinLayer::run()
-{
-  if (_lhs->data_type() == OperandType::FLOAT32)
-  {
-    minimum<float>();
-  }
-  else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    minQuant8();
-  }
-  else if (_lhs->data_type() == OperandType::INT32)
-  {
-    minimum<int32_t>();
-  }
-  else
-  {
-    throw std::runtime_error{"Min: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MinLayer.h b/runtime/onert/backend/cpu/ops/MinLayer.h
deleted file mode 100644
index 9bd114e54..000000000
--- a/runtime/onert/backend/cpu/ops/MinLayer.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class MinLayer : public ::onert::exec::IFunction
-{
-public:
-  MinLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
-  {
-    // DO NOTHING
-  }
-
-public:
-  template <typename T> void minimum();
-
-  void minQuant8();
-
-  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_lhs;
-  const IPortableTensor *_rhs;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/MulLayer.cc b/runtime/onert/backend/cpu/ops/MulLayer.cc
deleted file mode 100644
index eef73edf3..000000000
--- a/runtime/onert/backend/cpu/ops/MulLayer.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MulLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void MulLayer::mulFloat32()
-{
-  float output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.float_activation_max = output_activation_max;
-  op_params.float_activation_min = output_activation_min;
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void MulLayer::mulQuant8()
-{
-  int32_t output_activation_min, output_activation_max;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-
-  op_params.quantized_activation_max = output_activation_max;
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.input1_offset = -_lhs->data_offset();
-  op_params.input2_offset = -_rhs->data_offset();
-  op_params.output_offset = _output->data_offset();
-
-  double real_multiplier = _lhs->data_scale() * _rhs->data_scale() / _output->data_scale();
-  QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift);
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void MulLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                         const ir::Activation activation, IPortableTensor *output)
-{
-  _lhs = lhs;
-  _rhs = rhs;
-  _activation = activation;
-  _output = output;
-}
-
-void MulLayer::run()
-{
-  if (_output->data_type() == OperandType::FLOAT32)
-  {
-    mulFloat32();
-  }
-  else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    mulQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Mul: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MulLayer.h b/runtime/onert/backend/cpu/ops/MulLayer.h
deleted file mode 100644
index 2c4a98875..000000000
--- a/runtime/onert/backend/cpu/ops/MulLayer.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class MulLayer : public ::onert::exec::IFunction
-{
-public:
-  MulLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
-  {
-    // DO NOTHING
-  }
-
-public:
-  void mulFloat32();
-
-  void mulQuant8();
-
-  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                 const ir::Activation activation, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_lhs;
-  const IPortableTensor *_rhs;
-  IPortableTensor *_output;
-
-  ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/NegLayer.cc b/runtime/onert/backend/cpu/ops/NegLayer.cc
deleted file mode 100644
index 2cb95b771..000000000
--- a/runtime/onert/backend/cpu/ops/NegLayer.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "NegLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-NegLayer::NegLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void NegLayer::negFloat32()
-{
-  nnfw::cker::Neg(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                  getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void NegLayer::negQuant8() { throw std::runtime_error{"NYI"}; }
-
-void NegLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void NegLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    negFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    negQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Neg: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/NegLayer.h b/runtime/onert/backend/cpu/ops/NegLayer.h
deleted file mode 100644
index addf84ec2..000000000
--- a/runtime/onert/backend/cpu/ops/NegLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class NegLayer : public ::onert::exec::IFunction
-{
-public:
-  NegLayer();
-
-public:
-  void negFloat32();
-
-  void negQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/PoolLayer.cc b/runtime/onert/backend/cpu/ops/PoolLayer.cc
new file mode 100644
index 000000000..85d02a751
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/PoolLayer.cc
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PoolLayer.h"
+
+#include <cker/operation/AveragePool.h>
+#include <cker/operation/MaxPool.h>
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+template <typename T>
+void avgPool2D(const nnfw::cker::PoolParams &params, const IPortableTensor *input,
+               IPortableTensor *output)
+{
+  nnfw::cker::AveragePool<T>(params, getTensorShape(input),
+                             reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
+                             reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+void maxPool2D(const nnfw::cker::PoolParams &params, const IPortableTensor *input,
+               IPortableTensor *output)
+{
+  nnfw::cker::MaxPool<T>(params, getTensorShape(input),
+                         reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
+                         reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+std::function<void(const IPortableTensor *, IPortableTensor *)>
+generateKernelGeneric(const nnfw::cker::PoolParams &params, PoolType op_type)
+{
+  if (op_type == PoolType::kAvg)
+  {
+    return std::bind(&avgPool2D<T>, params, std::placeholders::_1, std::placeholders::_2);
+  }
+  else if (op_type == PoolType::kMax)
+  {
+    return std::bind(&maxPool2D<T>, params, std::placeholders::_1, std::placeholders::_2);
+  }
+  else
+  {
+    throw std::runtime_error{"Pool: unsupported pool type"};
+  }
+}
+} // namespace
+
+PoolLayer::PoolLayer() : _input(nullptr), _output(nullptr), _kernel()
+{
+  // DO NOTHING
+}
+
+#define POOLING_PARAMETERS                              \
+  nnfw::cker::PoolParams op_params;                     \
+  op_params.stride_height = strideHeight;               \
+  op_params.stride_width = strideWidth;                 \
+  op_params.filter_height = kernelHeight;               \
+  op_params.filter_width = kernelWidth;                 \
+  op_params.padding_values.height = (int8_t)paddingTop; \
+  op_params.padding_values.width = (int8_t)paddingLeft;
+
+void PoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft, const uint32_t,
+                          const uint32_t paddingTop, const uint32_t, const uint32_t strideWidth,
+                          const uint32_t strideHeight, const uint32_t kernelWidth,
+                          const uint32_t kernelHeight, const ir::Activation activation,
+                          IPortableTensor *output, const PoolType op_type)
+{
+  assert(input != nullptr);
+  assert(output != nullptr);
+
+  _input = input;
+  _output = output;
+
+  POOLING_PARAMETERS
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    float output_activation_min = 0;
+    float output_activation_max = 0;
+    CalculateActivationRange<float>(activation, &output_activation_min, &output_activation_max);
+    op_params.float_activation_min = output_activation_min;
+    op_params.float_activation_max = output_activation_max;
+
+    _kernel = generateKernelGeneric<float>(op_params, op_type);
+  }
+  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+  {
+    int32_t output_activation_min = 0;
+    int32_t output_activation_max = 0;
+    CalculateActivationRangeUint8(activation, _output, &output_activation_min,
+                                  &output_activation_max);
+    op_params.quantized_activation_min = output_activation_min;
+    op_params.quantized_activation_max = output_activation_max;
+    _kernel = generateKernelGeneric<uint8_t>(op_params, op_type);
+  }
+  else
+  {
+    throw std::runtime_error{"Pool: unsupported data type"};
+  }
+}
+
+void PoolLayer::run() { _kernel(_input, _output); }
+
+#undef AVGPOOLING_PARAMETERS
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MaxPoolLayer.h b/runtime/onert/backend/cpu/ops/PoolLayer.h
index 4c5109f64..b37835946 100644
--- a/runtime/onert/backend/cpu/ops/MaxPoolLayer.h
+++ b/runtime/onert/backend/cpu/ops/PoolLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
 
 #include <backend/IPortableTensor.h>
 #include "OperationUtils.h"
@@ -31,22 +31,25 @@ namespace cpu
 namespace ops
 {
 
-class MaxPoolLayer : public ::onert::exec::IFunction
+enum class PoolType
 {
-public:
-  MaxPoolLayer();
+  kAvg,
+  kL2,
+  kMax,
+};
 
+class PoolLayer : public ::onert::exec::IFunction
+{
 public:
-  void maxPoolFloat32();
-
-  void maxPoolQuant8();
+  PoolLayer();
 
+public:
   void configure(const IPortableTensor *input, const uint32_t paddingLeft,
                  const uint32_t paddingRight, const uint32_t paddingTop,
                  const uint32_t paddingBottom, const uint32_t strideWidth,
                  const uint32_t strideHeight, const uint32_t kernelWidth,
                  const uint32_t kernelHeight, const ir::Activation activation,
-                 IPortableTensor *output);
+                 IPortableTensor *output, const PoolType op_type);
 
   void run() override;
 
@@ -54,17 +57,7 @@ private:
   const IPortableTensor *_input;
   IPortableTensor *_output;
 
-  uint32_t _paddingLeft;
-  uint32_t _paddingTop;
-  uint32_t _paddingRight;
-  uint32_t _paddingBottom;
-
-  uint32_t _strideWidth;
-  uint32_t _strideHeight;
-  uint32_t _kernelWidth;
-  uint32_t _kernelHeight;
-
-  ir::Activation _activation;
+  std::function<void(const IPortableTensor *, IPortableTensor *)> _kernel;
 };
 
 } // namespace ops
@@ -72,4 +65,4 @@ private:
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc b/runtime/onert/backend/cpu/ops/QuantizeLayer.cc
deleted file mode 100644
index 45fc148bf..000000000
--- a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "QuantizeLayer.h"
-
-#include <cker/operation/Quantize.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-QuantizeLayer::QuantizeLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-template <typename InputT, typename OutputT> void QuantizeLayer::affineQuantize()
-{
-  nnfw::cker::Quantize(getTensorShape(_input), reinterpret_cast<const InputT *>(_input->buffer()),
-                       getTensorShape(_output), reinterpret_cast<OutputT *>(_output->buffer()),
-                       _output->data_scale(), _output->data_offset());
-}
-
-void QuantizeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void QuantizeLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    affineQuantize<float, uint8_t>();
-  }
-  else
-  {
-    throw std::runtime_error{"Quantize: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.h b/runtime/onert/backend/cpu/ops/QuantizeLayer.h
deleted file mode 100644
index b4e7aca40..000000000
--- a/runtime/onert/backend/cpu/ops/QuantizeLayer.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class QuantizeLayer : public ::onert::exec::IFunction
-{
-public:
-  QuantizeLayer();
-
-public:
-  template <typename InputT, typename OutputT> void affineQuantize();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/RoundLayer.cc b/runtime/onert/backend/cpu/ops/RankLayer.cc
index 185d7554e..4690bdf72 100644
--- a/runtime/onert/backend/cpu/ops/RoundLayer.cc
+++ b/runtime/onert/backend/cpu/ops/RankLayer.cc
@@ -14,12 +14,10 @@
  * limitations under the License.
  */
 
-#include "RoundLayer.h"
+#include "RankLayer.h"
 
 #include "OperationUtils.h"
 
-#include <cker/operation/Round.h>
-
 namespace onert
 {
 namespace backend
@@ -28,32 +26,28 @@ namespace cpu
 {
 namespace ops
 {
-RoundLayer::RoundLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
 
-void RoundLayer::roundFloat32()
+RankLayer::RankLayer() : _input(nullptr), _output(nullptr)
 {
-  nnfw::cker::Round(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                    getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+  // DO NOTHING
 }
 
-void RoundLayer::configure(const IPortableTensor *input, IPortableTensor *output)
+void RankLayer::configure(const IPortableTensor *input, IPortableTensor *output)
 {
   _input = input;
   _output = output;
 }
 
-void RoundLayer::run()
+void RankLayer::run()
 {
-  if (_input->data_type() == OperandType::FLOAT32)
+  if (_input->data_type() == OperandType::FLOAT32 || _input->data_type() == OperandType::INT32)
   {
-    roundFloat32();
+    int32_t *output_data = reinterpret_cast<int32_t *>(_output->buffer());
+    output_data[0] = _input->num_dimensions();
   }
   else
   {
-    throw std::runtime_error{"Round: unsupported data type"};
+    throw std::runtime_error{"Rank : unsupported data type"};
   }
 }
 
diff --git a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.h b/runtime/onert/backend/cpu/ops/RankLayer.h
index 054894203..6282ceb07 100644
--- a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.h
+++ b/runtime/onert/backend/cpu/ops/RankLayer.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
 
 #include <backend/IPortableTensor.h>
 
@@ -29,11 +29,13 @@ namespace cpu
 {
 namespace ops
 {
-class ZerosLikeLayer : public ::onert::exec::IFunction
+
+class RankLayer : public ::onert::exec::IFunction
 {
 public:
-  ZerosLikeLayer();
+  RankLayer();
 
+public:
   void configure(const IPortableTensor *input, IPortableTensor *output);
 
   void run() override;
@@ -48,4 +50,4 @@ private:
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.cc b/runtime/onert/backend/cpu/ops/ReLU6Layer.cc
deleted file mode 100644
index 26eb35e0d..000000000
--- a/runtime/onert/backend/cpu/ops/ReLU6Layer.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ReLU6Layer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/ReLU6.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ReLU6Layer::ReLU6Layer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void ReLU6Layer::relu6Float32()
-{
-  nnfw::cker::ReLU6(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                    reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ReLU6Layer::relu6Quant8()
-{
-  // cker quant8 relu is not implemented yet
-  throw std::runtime_error{"NYI"};
-}
-
-void ReLU6Layer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void ReLU6Layer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    relu6Float32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    relu6Quant8();
-  }
-  else
-  {
-    throw std::runtime_error{"ReLU6: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReLULayer.cc b/runtime/onert/backend/cpu/ops/ReLULayer.cc
deleted file mode 100644
index cb4529feb..000000000
--- a/runtime/onert/backend/cpu/ops/ReLULayer.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ReLULayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/ReLU.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ReLULayer::ReLULayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void ReLULayer::reluFloat32()
-{
-  nnfw::cker::ReLU(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                   getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ReLULayer::reluQuant8()
-{
-  // cker quant8 relu is not implemented yet
-  throw std::runtime_error{"NYI"};
-}
-
-void ReLULayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void ReLULayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    reluFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    reluQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"ReLU: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReLULayer.h b/runtime/onert/backend/cpu/ops/ReLULayer.h
deleted file mode 100644
index 4ba2be772..000000000
--- a/runtime/onert/backend/cpu/ops/ReLULayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class ReLULayer : public ::onert::exec::IFunction
-{
-public:
-  ReLULayer();
-
-public:
-  void reluFloat32();
-
-  void reluQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.cc b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
index fe22dbed7..bb5f85d60 100644
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
@@ -49,27 +49,31 @@ void evalLogic(const IPortableTensor *input, IPortableTensor *output, const std:
 }
 
 template <typename T>
-void evalType(const IPortableTensor *input, IPortableTensor *output, const std::vector<int> &axes,
-              bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+evalType(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
 {
   switch (reduce_type)
   {
     case ReduceType::kSum:
-      return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(0), reduce_kernel,
-                          [](const T current, const T in) -> T { return in + current; });
+      return std::bind(&evalLogic<T>, std::placeholders::_1, std::placeholders::_2,
+                       std::placeholders::_3, keep_dims, static_cast<T>(0), reduce_kernel,
+                       [](const T current, const T in) -> T { return in + current; });
       break;
     case ReduceType::kProd:
-      return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(1), reduce_kernel,
-                          [](const T current, const T in) -> T { return in * current; });
+      return std::bind(&evalLogic<T>, std::placeholders::_1, std::placeholders::_2,
+                       std::placeholders::_3, keep_dims, static_cast<T>(1), reduce_kernel,
+                       [](const T current, const T in) -> T { return in * current; });
       break;
     case ReduceType::kMax:
-      return evalLogic<T>(
-          input, output, axes, keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
+      return std::bind(
+          &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+          keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
           [](const T current, const T in) -> T { return (in > current) ? in : current; });
       break;
     case ReduceType::kMin:
-      return evalLogic<T>(
-          input, output, axes, keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
+      return std::bind(
+          &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+          keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
           [](const T current, const T in) -> T { return (in < current) ? in : current; });
       break;
     default:
@@ -79,44 +83,44 @@ void evalType(const IPortableTensor *input, IPortableTensor *output, const std::
 
 // Template specialization for bool type
 template <>
-void evalType<bool>(const IPortableTensor *input, IPortableTensor *output,
-                    const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel,
-                    ReduceType reduce_type)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+evalType<bool>(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
 {
   switch (reduce_type)
   {
     case ReduceType::kAny:
-      return evalLogic<bool>(
-          input, output, axes, keep_dims, false, reduce_kernel,
-          [](const bool current, const bool in) -> bool { return in || current; });
+      return std::bind(&evalLogic<bool>, std::placeholders::_1, std::placeholders::_2,
+                       std::placeholders::_3, keep_dims, false, reduce_kernel,
+                       [](const bool current, const bool in) -> bool { return in || current; });
       break;
     case ReduceType::kAll:
-      return evalLogic<bool>(
-          input, output, axes, keep_dims, true, reduce_kernel,
-          [](const bool current, const bool in) -> bool { return in && current; });
+      return std::bind(&evalLogic<bool>, std::placeholders::_1, std::placeholders::_2,
+                       std::placeholders::_3, keep_dims, true, reduce_kernel,
+                       [](const bool current, const bool in) -> bool { return in && current; });
       break;
     default:
       throw std::runtime_error{"Reduce: Unsupported reduce type"};
   }
 }
 
-template <ReduceType reduce_type>
-void evalGeneric(const IPortableTensor *input, IPortableTensor *output,
-                 const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+generateKernelGeneric(const IPortableTensor *input, bool keep_dims,
+                      nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
 {
   switch (input->data_type())
   {
     case OperandType::FLOAT32:
-      return evalType<float>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+      return evalType<float>(keep_dims, reduce_kernel, reduce_type);
     case OperandType::INT32:
-      return evalType<int32_t>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+      return evalType<int32_t>(keep_dims, reduce_kernel, reduce_type);
     case OperandType::BOOL8:
-      return evalType<bool>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+      return evalType<bool>(keep_dims, reduce_kernel, reduce_type);
     default:
       throw std::runtime_error{"Reduce(generic): unsupported data type"};
   }
 }
 
+// TODO Refine this function
 void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
                       const std::vector<int> &axes, bool keep_dims,
                       nnfw::cker::Reduce &reduce_kernel)
@@ -146,14 +150,15 @@ void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
     return;
   }
 
-  evalGeneric<ReduceType::kSum>(input, output, axes, keep_dims, reduce_kernel);
+  const auto kernel = generateKernelGeneric(input, keep_dims, reduce_kernel, ReduceType::kSum);
+  kernel(input, output, axes);
 }
 
 } // namespace
 
 ReduceLayer::ReduceLayer()
-    : _input(nullptr), _axes(nullptr), _output(nullptr), _reduceType(ReduceType::kAny),
-      _keep_dims(false), _reduce_kernel(new nnfw::cker::Reduce())
+    : _input(nullptr), _axes(nullptr), _output(nullptr), _reduce_kernel(new nnfw::cker::Reduce()),
+      _kernel()
 {
   // DO NOTHING
 }
@@ -166,43 +171,44 @@ void ReduceLayer::configure(const IPortableTensor *input, const IPortableTensor
   _input = input;
   _axes = axes;
   _output = output;
-  _reduceType = reduceType;
-  _keep_dims = keep_dims;
-}
 
-void ReduceLayer::run()
-{
-  const auto axes = getReducerAxes(_axes);
-  switch (_reduceType)
+  switch (reduceType)
   {
     case ReduceType::kSum:
       if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
       {
-        evalSumQuantized(_input, _output, axes, _keep_dims, *_reduce_kernel);
+        _kernel = std::bind(&evalSumQuantized, std::placeholders::_1, std::placeholders::_2,
+                            std::placeholders::_3, keep_dims, *_reduce_kernel);
         return;
       }
-      evalGeneric<ReduceType::kSum>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+      _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kSum);
       break;
     case ReduceType::kProd:
-      evalGeneric<ReduceType::kProd>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+      _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kProd);
       break;
     case ReduceType::kMax:
-      evalGeneric<ReduceType::kMax>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+      _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kMax);
       break;
     case ReduceType::kMin:
-      evalGeneric<ReduceType::kMin>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+      _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kMin);
       break;
     case ReduceType::kAny:
-      evalGeneric<ReduceType::kAny>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+      _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAny);
       break;
     case ReduceType::kAll:
-      evalGeneric<ReduceType::kAll>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+      _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAll);
       break;
     default:
       throw std::runtime_error{"ReduceSum: Unsupported reduce type"};
   }
 }
 
+void ReduceLayer::run()
+{
+  const auto axes = getReducerAxes(_axes);
+  _kernel(_input, _output, axes);
+}
+
 } // namespace ops
 } // namespace cpu
 } // namespace backend
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.h b/runtime/onert/backend/cpu/ops/ReduceLayer.h
index 8e7bcdb07..332d399bd 100644
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.h
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.h
@@ -65,10 +65,11 @@ private:
   const IPortableTensor *_input;
   const IPortableTensor *_axes;
   IPortableTensor *_output;
-  ReduceType _reduceType;
-  bool _keep_dims;
 
   std::unique_ptr<nnfw::cker::Reduce> _reduce_kernel;
+  std::function<void(const IPortableTensor *input, IPortableTensor *output,
+                     const std::vector<int> &axes)>
+      _kernel;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/RoundLayer.h b/runtime/onert/backend/cpu/ops/RoundLayer.h
deleted file mode 100644
index fc6a46c0d..000000000
--- a/runtime/onert/backend/cpu/ops/RoundLayer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class RoundLayer : public ::onert::exec::IFunction
-{
-public:
-  RoundLayer();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  void roundFloat32();
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/RsqrtLayer.cc b/runtime/onert/backend/cpu/ops/RsqrtLayer.cc
deleted file mode 100644
index 0bd468f96..000000000
--- a/runtime/onert/backend/cpu/ops/RsqrtLayer.cc
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "RsqrtLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-RsqrtLayer::RsqrtLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void RsqrtLayer::rsqrtFloat32()
-{
-  nnfw::cker::Rsqrt(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                    getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void RsqrtLayer::rsqrtQuant8() { throw std::runtime_error{"NYI : QASYMM8 not supported"}; }
-
-void RsqrtLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void RsqrtLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    rsqrtFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    rsqrtQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Rsqrt: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/RsqrtLayer.h b/runtime/onert/backend/cpu/ops/RsqrtLayer.h
deleted file mode 100644
index 49abbb08d..000000000
--- a/runtime/onert/backend/cpu/ops/RsqrtLayer.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class RsqrtLayer : public ::onert::exec::IFunction
-{
-public:
-  RsqrtLayer();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  void rsqrtFloat32();
-  void rsqrtQuant8();
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/SinLayer.cc b/runtime/onert/backend/cpu/ops/SinLayer.cc
deleted file mode 100644
index 2a6b11753..000000000
--- a/runtime/onert/backend/cpu/ops/SinLayer.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SinLayer.h"
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-SinLayer::SinLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void SinLayer::sinFloat32()
-{
-  nnfw::cker::Sin(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                  getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void SinLayer::sinQuant8() { throw std::runtime_error{"NYI"}; }
-
-void SinLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void SinLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    sinFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    sinQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Sin: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/SinLayer.h b/runtime/onert/backend/cpu/ops/SinLayer.h
deleted file mode 100644
index 348350f41..000000000
--- a/runtime/onert/backend/cpu/ops/SinLayer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class SinLayer : public ::onert::exec::IFunction
-{
-public:
-  SinLayer();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  void sinFloat32();
-  void sinQuant8();
-
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
index 6e2bb584a..095e67abc 100644
--- a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
@@ -34,55 +34,23 @@ SoftMaxLayer::SoftMaxLayer() : _input(nullptr), _output(nullptr), _beta(0.0)
   // DO NOTHING
 }
 
-// Performs softmax along the input of size (input_size * batch_size).
-void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
-             float *out)
+void SoftMaxLayer::softmaxFloat32()
 {
-  assert(input_size > 0);
-
-  // For each batch
-  for (int b = 0; b < batch_size; b++)
+  if (getNumberOfDimensions(_input) == 1)
   {
-    // Find the max coeff.
-    float max_coeff = in[0];
-    for (int i = 1; i < input_size; i++)
-    {
-      if (in[i] > max_coeff)
-        max_coeff = in[i];
-    }
-
-    // Compute the normalized sum of exps.
-    float exp_sum = 0.0;
-    for (int i = 0; i < input_size; i++)
-    {
-      out[i] = std::exp((in[i] - max_coeff) * beta);
-      exp_sum += out[i];
-    }
-
-    // Divide by the sum of exps.
-    float reciprocal_sum_exp = 1.f / exp_sum;
-    for (int i = 0; i < input_size; i++)
-    {
-      out[i] *= reciprocal_sum_exp;
-    }
-
-    // Advance in and out pointers for the next batch.
-    in += input_size;
-    out += input_size;
+    uint32_t input_size = getNumberOfElements(_input);
+    nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, 1, _beta,
+                        reinterpret_cast<float *>(_output->buffer()));
   }
-}
-
-void SoftMaxLayer::softmaxFloat32()
-{
-  if (getNumberOfDimensions(_input) == 2)
+  else if (getNumberOfDimensions(_input) == 2)
   {
     uint32_t batch_size = getSizeOfDimension(_input, 0);
     if (batch_size == 0)
       throw std::runtime_error("batch_size should not be 0");
 
     uint32_t input_size = getNumberOfElements(_input) / batch_size;
-    Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size, _beta,
-            reinterpret_cast<float *>(_output->buffer()));
+    nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size,
+                        _beta, reinterpret_cast<float *>(_output->buffer()));
   }
   else if (getNumberOfDimensions(_input) == 4)
   {
@@ -94,7 +62,7 @@ void SoftMaxLayer::softmaxFloat32()
   }
   else
   {
-    throw std::runtime_error{"only 2D and 4D tensors supported"};
+    throw std::runtime_error{"only 1D, 2D and 4D tensors supported"};
   }
 }
 
diff --git a/runtime/onert/backend/cpu/ops/SubLayer.cc b/runtime/onert/backend/cpu/ops/SubLayer.cc
deleted file mode 100644
index 597d52952..000000000
--- a/runtime/onert/backend/cpu/ops/SubLayer.cc
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SubLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void SubLayer::subFloat32()
-{
-  float output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.float_activation_max = output_activation_max;
-  op_params.float_activation_min = output_activation_min;
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void SubLayer::subInt32()
-{
-  int32_t output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.quantized_activation_max = output_activation_max;
-  op_params.quantized_activation_min = output_activation_min;
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-}
-
-void SubLayer::subQuant8()
-{
-  int32_t output_activation_min, output_activation_max;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.quantized_activation_max = output_activation_max;
-  op_params.quantized_activation_min = output_activation_min;
-  // Parameters for scaled quantized computation
-  op_params.left_shift = 20;
-  // Zero-points of input and output tensors
-  op_params.input1_offset = -_lhs->data_offset();
-  op_params.input2_offset = -_rhs->data_offset();
-  op_params.output_offset = _output->data_offset();
-  assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
-  assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
-  assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
-
-  // Compute normalized scale for _lhs and _rhs values,
-  // and represent in 32-bit fixed point
-  const double norm_max_scale = 2 * std::max(_lhs->data_scale(), _rhs->data_scale());
-  const double real_lhs_scale = _lhs->data_scale() / norm_max_scale;
-  const double real_rhs_scale = _rhs->data_scale() / norm_max_scale;
-  // output scale is used to normalize final result, so we invert the scale here
-  const double real_output_scale =
-      norm_max_scale / (_output->data_scale() * (1 << op_params.left_shift));
-
-  // Represent the scales as fixed int32_t multipliers, and int32_t shifts
-  QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
-  QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
-  op_params.input2_multiplier *= -1;
-  QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void SubLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                         const ir::Activation activation, IPortableTensor *output)
-{
-  _lhs = lhs;
-  _rhs = rhs;
-  _activation = activation;
-  _output = output;
-}
-
-void SubLayer::run()
-{
-  if (_output->data_type() == OperandType::FLOAT32)
-  {
-    subFloat32();
-  }
-  else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    subQuant8();
-  }
-  else if (_output->data_type() == OperandType::INT32)
-  {
-    subInt32();
-  }
-  else
-  {
-    throw std::runtime_error{"Sub: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/SubLayer.h b/runtime/onert/backend/cpu/ops/SubLayer.h
deleted file mode 100644
index 86f32ca6d..000000000
--- a/runtime/onert/backend/cpu/ops/SubLayer.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class SubLayer : public ::onert::exec::IFunction
-{
-public:
-  SubLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
-  {
-    // DO NOTHING
-  }
-
-public:
-  void subFloat32();
-
-  void subQuant8();
-
-  void subInt32();
-
-  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                 const ir::Activation activation, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_lhs;
-  const IPortableTensor *_rhs;
-  IPortableTensor *_output;
-
-  ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/TanhLayer.cc b/runtime/onert/backend/cpu/ops/TanhLayer.cc
deleted file mode 100644
index 910ac1f41..000000000
--- a/runtime/onert/backend/cpu/ops/TanhLayer.cc
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TanhLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Tanh.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-TanhLayer::TanhLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void TanhLayer::PopulateLookupTable()
-{
-  const auto input_scale = static_cast<double>(_input->data_scale());
-  const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
-  const auto output_scale = static_cast<double>(_output->data_scale());
-  const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
-  const float inverse_scale = 1 / output_scale;
-  int32_t maxval = std::numeric_limits<uint8_t>::max();
-  int32_t minval = std::numeric_limits<uint8_t>::min();
-  for (int32_t val = minval; val <= maxval; ++val)
-  {
-    const float dequantized = input_scale * (val - input_zero_point);
-    const float transformed = std::tanh(dequantized);
-    const float rescaled = std::round(transformed * inverse_scale);
-    const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
-    _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
-  }
-}
-
-void TanhLayer::tanhFloat32()
-{
-  nnfw::cker::Tanh(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                   getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void TanhLayer::tanhQuant8()
-{
-  const int size = MatchingFlatSize(getTensorShape(_input), getTensorShape(_output));
-  const uint8_t *input_data = reinterpret_cast<const uint8_t *>(_input->buffer());
-  uint8_t *output_data = reinterpret_cast<uint8_t *>(_output->buffer());
-
-  for (int i = 0; i < size; ++i)
-  {
-    output_data[i] = _table[input_data[i]];
-  }
-}
-
-void TanhLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-  if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    PopulateLookupTable();
-  }
-}
-
-void TanhLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    tanhFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    tanhQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Tanh: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc b/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc
deleted file mode 100644
index ae8084518..000000000
--- a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ZerosLikeLayer.h"
-
-#include "OperationUtils.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-ZerosLikeLayer::ZerosLikeLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void ZerosLikeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void ZerosLikeLayer::run()
-{
-  if (!HaveSameShapes(_input, _output))
-    throw std::runtime_error{"ZerosLike: input and output shape don't match."};
-
-  auto element_size = getTensorShape(_input).FlatSize();
-
-  switch (_input->data_type())
-  {
-    case OperandType::FLOAT32:
-      memset(reinterpret_cast<float *>(_output->buffer()), 0, element_size * sizeof(float));
-      break;
-    case OperandType::INT32:
-      memset(reinterpret_cast<int32_t *>(_output->buffer()), 0, element_size * sizeof(int32_t));
-      break;
-    default:
-      throw std::runtime_error{"ZerosLike: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/include/backend/BackendContext.h b/runtime/onert/core/include/backend/BackendContext.h
index c263aef2b..1eba29550 100644
--- a/runtime/onert/core/include/backend/BackendContext.h
+++ b/runtime/onert/core/include/backend/BackendContext.h
@@ -29,6 +29,7 @@ class Backend;
 class IConstantInitializer;
 class IKernelGenerator;
 class ITensorRegister;
+struct ITensorRegistry;
 struct ITensorBuilder;
 struct IOptimizer;
 
@@ -45,14 +46,15 @@ public:
 
 public:
   BackendContext(const Backend *backend, const ir::Graph *graph,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
                  std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
                  std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
                  std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
                  std::shared_ptr<ITensorRegister> tensor_register = nullptr,
                  std::shared_ptr<IOptimizer> optimizer = nullptr)
-      : _backend{backend}, _graph{graph}, tensor_builder{tensor_builder},
-        constant_initializer{constant_initializer}, kernel_gen{kernel_gen},
-        tensor_register{tensor_register}, optimizer{optimizer}
+      : _backend{backend}, _graph{graph}, tensor_registry{tensor_registry},
+        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+        kernel_gen{kernel_gen}, tensor_register{tensor_register}, optimizer{optimizer}
   {
   }
 
@@ -74,6 +76,7 @@ private:
   std::vector<ir::OperandIndex> _operand_list;
 
 public:
+  std::shared_ptr<ITensorRegistry> tensor_registry;
   std::shared_ptr<ITensorBuilder> tensor_builder;
   std::shared_ptr<IConstantInitializer> constant_initializer;
   std::shared_ptr<IKernelGenerator> kernel_gen;
diff --git a/runtime/onert/core/include/backend/IConstantInitializer.h b/runtime/onert/core/include/backend/IConstantInitializer.h
index f322015ba..149acecb4 100644
--- a/runtime/onert/core/include/backend/IConstantInitializer.h
+++ b/runtime/onert/core/include/backend/IConstantInitializer.h
@@ -162,14 +162,14 @@ public:
 public:
   void run()
   {
-    assert(tensor_builder().get());
+    assert(tensor_registry());
     for (const auto &it : _init_map)
     {
       const auto &ind = it.first;
       const auto &fn = it.second;
 
       const auto &model_obj = _operands.at(ind);
-      auto tensor_obj = tensor_builder()->tensorAt(ind);
+      auto tensor_obj = tensor_registry()->getNativeITensor(ind);
       assert(tensor_obj != nullptr);
       fn(model_obj, *tensor_obj);
       VERBOSE(FillOperandData) << "Fill data for operand " << ind.value() << std::endl;
@@ -189,10 +189,7 @@ public:
   void setLayout(ir::Layout layout) { _current_op_seq_layout = layout; }
 
 protected:
-  using OperationVisitor::visit;
-
-protected:
-  virtual std::shared_ptr<ITensorBuilder> tensor_builder() const = 0;
+  virtual std::shared_ptr<ITensorRegistry> tensor_registry() const = 0;
 
 public:
   virtual void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj)
diff --git a/runtime/onert/core/include/backend/ITensorBuilder.h b/runtime/onert/core/include/backend/ITensorBuilder.h
index b760cda0e..f93ab81ae 100644
--- a/runtime/onert/core/include/backend/ITensorBuilder.h
+++ b/runtime/onert/core/include/backend/ITensorBuilder.h
@@ -40,11 +40,6 @@ struct ITensorBuilder
   virtual ~ITensorBuilder(void) = default;
 
   /**
-   * @brief Returns true if this TensorBuilder support dynamic tensor
-   */
-  virtual bool supportDynamicTensor() = 0;
-
-  /**
    * @brief Register tensor information to allocate on backend
    *
    * @param ind Index
@@ -63,15 +58,6 @@ struct ITensorBuilder
    */
   virtual bool isRegistered(const ir::OperandIndex &) const = 0;
 
-  /**
-   * @brief Get tensor registry
-   *
-   * @return std::shared_ptr<backend::ITensorRegistry> tensor registry object
-   *
-   * @note   Backend should implement this when it has StaticTensorManager and DynamicTensorManager
-   */
-  virtual std::shared_ptr<backend::ITensorRegistry> tensorRegistry() = 0;
-
 public: // methods for static tensor allocation
   /**
    * @brief Let the tensor builder know first use(start of lifetime) of a tensor
@@ -104,32 +90,6 @@ public: // methods for static tensor allocation
   virtual void postFunctionPrepare() = 0;
 
   /**
-   * @brief Get the tensor object
-   *
-   * @param ind Index of the tensor
-   * @return std::shared_ptr<ITensor> The tensor object
-   */
-  virtual std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) = 0;
-
-  /**
-   * @brief Set the migrant tensor object
-   *
-   * @return true if succeeded
-   * @return false if failed or unsupported
-   */
-  virtual bool setMigrantTensor(const ir::OperandIndex &, const std::shared_ptr<IPortableTensor> &)
-  {
-    return false;
-  }
-
-  /**
-   * @brief Iterate over tensors
-   *
-   * @param fn The function to be run
-   */
-  virtual void iterate(const IterateFunction &fn) = 0;
-
-  /**
    * @brief Release static @c ITensorManger object which was built
    *        Before calling this, @c allocate must have been called
    *
@@ -147,10 +107,7 @@ public: // methods for dynamic tensor allocation
    * @note   Since it is a pointer, its life time is from the cration of TensorBuilder
    *         to the end of execution
    */
-  virtual IDynamicTensorManager *dynamicTensorManager(void)
-  {
-    throw std::runtime_error("dynamicTensorManager(): NYI");
-  }
+  virtual IDynamicTensorManager *dynamicTensorManager(void) { return nullptr; }
 
   /**
    * @brief Release dynamic @c ITensorManger object which was built
@@ -158,10 +115,7 @@ public: // methods for dynamic tensor allocation
    *
    * @return std::unique_ptr<ITensorManager> Tensor Manager object
    */
-  virtual std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void)
-  {
-    throw std::runtime_error("releaseDynamicTensorManager() for this backend is not supported");
-  }
+  virtual std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) { return nullptr; }
 };
 
 } // namespace backend
diff --git a/runtime/onert/core/include/backend/ITensorRegistry.h b/runtime/onert/core/include/backend/ITensorRegistry.h
index 855513124..88fcb0fcd 100644
--- a/runtime/onert/core/include/backend/ITensorRegistry.h
+++ b/runtime/onert/core/include/backend/ITensorRegistry.h
@@ -21,6 +21,7 @@
 
 #include "ir/Index.h"
 #include "backend/ITensor.h"
+#include "backend/IPortableTensor.h"
 
 namespace onert
 {
@@ -51,13 +52,22 @@ struct ITensorRegistry
    * @note  Returned tensor cannot be used longer than dynamic tensor manager
    */
   virtual std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &) = 0;
+  /**
+   * @brief Set the Migrant Tensor which are from other backends
+   *
+   * @return true if supported
+   * @return false if not supported
+   */
+  virtual bool setMigrantTensor(const ir::OperandIndex &, const std::shared_ptr<IPortableTensor> &)
+  {
+    return false;
+  }
 };
 
 } // namespace backend
 } // namespace onert
 
 #include "ir/OperandIndexMap.h"
-#include "backend/IPortableTensor.h"
 
 namespace onert
 {
@@ -108,24 +118,23 @@ public:
     return nullptr;
   }
 
-  bool setMigrantTensor(const ir::OperandIndex &ind, const std::shared_ptr<IPortableTensor> &tensor)
+  bool setMigrantTensor(const ir::OperandIndex &ind,
+                        const std::shared_ptr<IPortableTensor> &tensor) override
   {
-    // TODO Uncomment this as two tensors for an index is not allowed.
-    //      But now it is temporarily allowed as a workaround. External one hides Managed one.
-    // auto itr = _native.find(ind);
-    // if (itr != _native.end() && itr->second != nullptr && tensor != nullptr)
-    //  throw std::runtime_error{
-    //      "Tried to set an migrant tensor but an native tensor already exists."};
+    assert(tensor != nullptr);
+    auto itr = _native.find(ind);
+    if (itr != _native.end())
+      throw std::runtime_error{"Tried to set a migrant tensor but a native tensor already exists."};
     _migrant[ind] = tensor;
     return true;
   }
 
   void setNativeTensor(const ir::OperandIndex &ind, const std::shared_ptr<T_Tensor> &tensor)
   {
+    assert(tensor != nullptr);
     auto itr = _migrant.find(ind);
-    if (itr != _migrant.end() && itr->second != nullptr && tensor != nullptr)
-      throw std::runtime_error{
-          "Tried to set a native tensor but an migrant tensor already exists."};
+    if (itr != _migrant.end())
+      throw std::runtime_error{"Tried to set a native tensor but a migrant tensor already exists."};
     _native[ind] = tensor;
   }
 
diff --git a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
index a7e034a91..3f09b7a4a 100644
--- a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
+++ b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
@@ -20,6 +20,7 @@
 #include "MemoryManager.h"
 
 #include "backend/IStaticTensorManager.h"
+#include "backend/IDynamicTensorManager.h"
 #include "ir/OperandIndexMap.h"
 #include "ir/OperandInfo.h"
 #include "TensorRegistry.h"
@@ -34,7 +35,8 @@ namespace cpu_common
 class StaticTensorManager : public backend::IStaticTensorManager
 {
 public:
-  StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg);
+  StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
+                      IDynamicTensorManager *dynamic_tensor_manager);
   virtual ~StaticTensorManager() = default;
 
   void allocateConsts(void);
@@ -55,6 +57,7 @@ private:
   std::unique_ptr<MemoryManager> _nonconst_mgr;
   const std::shared_ptr<TensorRegistry> _tensors;
   ir::OperandIndexMap<bool> _as_constants;
+  IDynamicTensorManager *_dynamic_tensor_manager;
 };
 
 } // namespace cpu_common
diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h
new file mode 100644
index 000000000..aadba6857
--- /dev/null
+++ b/runtime/onert/core/include/compiler/LoweredGraph.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_LOWERED_GRAPH_H__
+#define __ONERT_IR_LOWERED_GRAPH_H__
+
+#include "ir/Graph.h"
+#include "ir/LowerInfoMap.h"
+#include "ir/OpSequences.h"
+#include "compiler/BackendResolver.h"
+#include "compiler/Compiler.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+/**
+ * @brief Class that contains lowering information on graph.
+ *        In addition, after lowering, operands in graph will be set to "dynamic"
+ *        if the shape of output of an operation cannot be decided at compilation time.
+ */
+class LoweredGraph
+{
+public:
+  LoweredGraph(const ir::Graph &graph, const compiler::CompilerOptions &options);
+
+  ir::Graph &graph() { return _graph; }
+  const ir::Graph &graph() const { return _graph; }
+  const ir::LowerInfoMap *getLowerInfo() const { return &_lower_info_map; }
+  const ir::operation::LowerInfo *getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const;
+  void setLowerInfo(const ir::OpSequenceIndex &op_seq_index,
+                    std::unique_ptr<ir::operation::LowerInfo> &&lower_info);
+  void removeLowerInfo(const ir::OpSequenceIndex &op_seq_index);
+  const ir::operand::LowerInfo *getLowerInfo(const ir::OperandIndex &index) const;
+  ir::operand::LowerInfo *getLowerInfo(const ir::OperandIndex &index);
+  void setLowerInfo(const ir::OperandIndex &index,
+                    std::unique_ptr<ir::operand::LowerInfo> &&lower_info);
+  void removeLowerInfo(const ir::OperandIndex &index);
+  ir::OpSequences &op_seqs() { return _op_seqs; }
+  const ir::OpSequences &op_seqs() const { return _op_seqs; }
+  void iterateTopolOpSeqs(
+      const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const;
+  void
+  iterateTopolOpSeqs(const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn);
+  const backend::BackendContexts &backend_contexts() { return _backend_contexts; }
+  const backend::BackendContexts &backend_contexts() const { return _backend_contexts; }
+  std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; }
+
+private:
+  void
+  makeOpSequences(ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+                  const compiler::CompilerOptions &options,
+                  const compiler::BackendResolver &backend_resolver);
+
+  void manipulateLowerInfo(
+      ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+      bool is_primary);
+  void dumpLowerInfo();
+  bool mergeable(const ir::OpSequenceIndex &op_seq_index, const ir::OperationIndex &node_index,
+                 ir::Layout layout, const compiler::BackendResolver &backend_resolver);
+  ir::OpSequenceIndex appendFreshSingleOpSequence(const ir::OperationIndex &node_index,
+                                                  const ir::Operation &node);
+
+private:
+  ir::Graph _graph;
+  backend::BackendContexts _backend_contexts;
+  std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
+  ir::LowerInfoMap _lower_info_map;
+  // Pass(for Perm) can accept only graph so that Graph has OpSequences as a member
+  ir::OpSequences _op_seqs;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_IR_LOWERED_GRAPH_H__
diff --git a/runtime/onert/core/include/compiler/StaticShapeInference.h b/runtime/onert/core/include/compiler/StaticShapeInference.h
index bff68c9fa..b97cb5b7b 100644
--- a/runtime/onert/core/include/compiler/StaticShapeInference.h
+++ b/runtime/onert/core/include/compiler/StaticShapeInference.h
@@ -19,7 +19,7 @@
 
 #include "ir/OperationVisitor.h"
 #include "ir/OpSequence.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
 #include "ir/Index.h"
 
 #include <memory>
@@ -41,7 +41,8 @@ class StaticShapeInferer : public ir::OperationVisitor
 public:
   StaticShapeInferer(
       const ir::SubgraphIndex &subg_idx,
-      const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<ir::LoweredGraph>> &lowered_subgs)
+      const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
+          &lowered_subgs)
       : _lowered_subgs(lowered_subgs), _operands(lowered_subgs.at(subg_idx)->graph().operands()),
         _operations(lowered_subgs.at(subg_idx)->graph().operations()),
         _return_has_dynamic_tensor(false)
@@ -57,54 +58,34 @@ public:
    * @param op_seq sequence of operations
    * @return @c true if op_seq's input or output has any dynamic tensor; @c false otherwise.
    */
-  bool infer(const ir::OpSequence &op_seq)
-  {
-    bool has_dynamic_tensor = false;
-
-    _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit()
-
-    for (const auto &operation_idx : op_seq.operations())
-    {
-      _operations.at(operation_idx).accept(*this);
-
-      has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor;
-    }
-
-    return has_dynamic_tensor;
-  }
+  bool infer(const ir::OpSequence &op_seq);
 
   void dump();
 
 private:
+  bool checkDynamicInput(const ir::Operation &op);
+  void setDynamicOutput(const ir::Operation &op);
+
+private:
   // TODO Define visitors for operations. List them in alphabetic order.
-  void visit(const ir::operation::Abs &op) override;
-  void visit(const ir::operation::Add &op) override;
   void visit(const ir::operation::ArgMax &op) override;
   void visit(const ir::operation::BatchMatMul &op) override;
+  void visit(const ir::operation::BinaryArithmetic &op) override;
   void visit(const ir::operation::BroadcastTo &op) override;
-  void visit(const ir::operation::Cast &op) override;
   void visit(const ir::operation::Comparison &op) override;
   void visit(const ir::operation::Concat &op) override;
   void visit(const ir::operation::Conv2D &op) override;
-  void visit(const ir::operation::Cos &op) override;
-  void visit(const ir::operation::Div &op) override;
-  void visit(const ir::operation::Exp &op) override;
+  void visit(const ir::operation::ElementwiseActivation &op) override;
+  void visit(const ir::operation::ElementwiseBinary &op) override;
+  void visit(const ir::operation::ElementwiseUnary &op) override;
   void visit(const ir::operation::ExpandDims &op) override;
   void visit(const ir::operation::Fill &op) override;
   void visit(const ir::operation::FullyConnected &op) override;
   void visit(const ir::operation::FusedBatchNorm &op) override;
   void visit(const ir::operation::Gather &op) override;
   void visit(const ir::operation::If &op) override;
-  void visit(const ir::operation::Log &op) override;
-  void visit(const ir::operation::LogicalNot &op) override;
-  void visit(const ir::operation::LogicalOr &op) override;
-  void visit(const ir::operation::Logistic &op) override;
   void visit(const ir::operation::L2Normalization &op) override;
   void visit(const ir::operation::MatrixBandPart &op) override;
-  void visit(const ir::operation::Max &op) override;
-  void visit(const ir::operation::Min &op) override;
-  void visit(const ir::operation::Mul &op) override;
-  void visit(const ir::operation::Neg &op) override;
   void visit(const ir::operation::OneHot &op) override;
   void visit(const ir::operation::Pack &op) override;
   void visit(const ir::operation::Pad &op) override;
@@ -113,27 +94,21 @@ private:
   void visit(const ir::operation::Range &op) override;
   void visit(const ir::operation::Reduce &op) override;
   void visit(const ir::operation::Reshape &op) override;
-  void visit(const ir::operation::Round &op) override;
-  void visit(const ir::operation::RSQRT &op) override;
   void visit(const ir::operation::ResizeBilinear &op) override;
   void visit(const ir::operation::Reverse &op) override;
   void visit(const ir::operation::Select &op) override;
   void visit(const ir::operation::Shape &op) override;
-  void visit(const ir::operation::Sin &op) override;
   void visit(const ir::operation::Slice &op) override;
   void visit(const ir::operation::Softmax &op) override;
   void visit(const ir::operation::SpaceToBatchND &op) override;
   void visit(const ir::operation::Split &op) override;
   void visit(const ir::operation::Squeeze &op) override;
   void visit(const ir::operation::StridedSlice &op) override;
-  void visit(const ir::operation::Sub &op) override;
   void visit(const ir::operation::SquaredDifference &op) override;
-  void visit(const ir::operation::Tanh &op) override;
   void visit(const ir::operation::Tile &op) override;
   void visit(const ir::operation::Transpose &op) override;
   void visit(const ir::operation::Unpack &op) override;
   void visit(const ir::operation::While &op) override;
-  void visit(const ir::operation::ZerosLike &op) override;
 
 private:
   /**
@@ -149,7 +124,8 @@ private:
   void handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx);
 
 private:
-  const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<ir::LoweredGraph>> &_lowered_subgs;
+  const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
+      &_lowered_subgs;
   // _operands and _operations can be changed by controlflow operation
   ir::Operands &_operands;     // operands of current subgraph
   ir::Operations &_operations; // operations of current subgraph
diff --git a/runtime/onert/core/include/exec/DynamicShapeInference.h b/runtime/onert/core/include/exec/DynamicShapeInference.h
index bca80db09..6f6659659 100644
--- a/runtime/onert/core/include/exec/DynamicShapeInference.h
+++ b/runtime/onert/core/include/exec/DynamicShapeInference.h
@@ -38,46 +38,34 @@ namespace exec
 class DynamicShapeInferer : public ir::OperationVisitor
 {
 public:
-  DynamicShapeInferer(const ir::Operands &operands, backend::IDynamicTensorManager *tensor_manager,
+  DynamicShapeInferer(const ir::Operands &operands,
                       const std::shared_ptr<backend::ITensorRegistry> &tensor_registry)
-      : _operands(operands), _dynamic_tensor_manager(tensor_manager),
-        _tensor_registry(tensor_registry)
+      : _operands(operands), _tensor_registry(tensor_registry)
   {
     UNUSED_RELEASE(_operands);
-    UNUSED_RELEASE(_dynamic_tensor_manager);
     UNUSED_RELEASE(_tensor_registry);
   }
 
 public:
   // TODO Define visitors for operations. List them in alphabetic order.
   // Remove TODO when any op starting from the alphabet is added
-  void visit(const ir::operation::Abs &op) override;
-  void visit(const ir::operation::Add &op) override;
   void visit(const ir::operation::ArgMax &op) override;
   void visit(const ir::operation::BatchMatMul &op) override;
+  void visit(const ir::operation::BinaryArithmetic &op) override;
   void visit(const ir::operation::BroadcastTo &op) override;
-  void visit(const ir::operation::Cast &op) override;
   void visit(const ir::operation::Comparison &op) override;
   void visit(const ir::operation::Concat &op) override;
   void visit(const ir::operation::Conv2D &op) override;
-  void visit(const ir::operation::Cos &op) override;
-  void visit(const ir::operation::Div &op) override;
-  void visit(const ir::operation::Exp &op) override;
+  void visit(const ir::operation::ElementwiseActivation &op) override;
+  void visit(const ir::operation::ElementwiseBinary &op) override;
+  void visit(const ir::operation::ElementwiseUnary &op) override;
   void visit(const ir::operation::ExpandDims &op) override;
   void visit(const ir::operation::Fill &op) override;
   void visit(const ir::operation::FullyConnected &op) override;
   void visit(const ir::operation::FusedBatchNorm &op) override;
   void visit(const ir::operation::Gather &op) override;
-  void visit(const ir::operation::Log &op) override;
-  void visit(const ir::operation::LogicalNot &op) override;
-  void visit(const ir::operation::LogicalOr &op) override;
-  void visit(const ir::operation::Logistic &op) override;
   void visit(const ir::operation::L2Normalization &op) override;
   void visit(const ir::operation::MatrixBandPart &op) override;
-  void visit(const ir::operation::Max &op) override;
-  void visit(const ir::operation::Min &op) override;
-  void visit(const ir::operation::Mul &op) override;
-  void visit(const ir::operation::Neg &op) override;
   void visit(const ir::operation::OneHot &op) override;
   void visit(const ir::operation::Pack &op) override;
   void visit(const ir::operation::Pad &op) override;
@@ -87,27 +75,21 @@ public:
   void visit(const ir::operation::Range &op) override;
   void visit(const ir::operation::Reduce &op) override;
   void visit(const ir::operation::Reshape &op) override;
-  void visit(const ir::operation::Round &op) override;
-  void visit(const ir::operation::RSQRT &op) override;
   void visit(const ir::operation::ResizeBilinear &op) override;
   void visit(const ir::operation::Reverse &op) override;
   void visit(const ir::operation::Select &op) override;
   void visit(const ir::operation::Shape &op) override;
-  void visit(const ir::operation::Sin &op) override;
   void visit(const ir::operation::Slice &op) override;
   void visit(const ir::operation::Softmax &op) override;
   void visit(const ir::operation::SpaceToBatchND &op) override;
   void visit(const ir::operation::Split &op) override;
   void visit(const ir::operation::Squeeze &op) override;
   void visit(const ir::operation::StridedSlice &op) override;
-  void visit(const ir::operation::Sub &op) override;
   void visit(const ir::operation::SquaredDifference &op) override;
-  void visit(const ir::operation::Tanh &op) override;
   void visit(const ir::operation::Tile &op) override;
   void visit(const ir::operation::Transpose &op) override;
   void visit(const ir::operation::Unpack &op) override;
   // TODO write op starting from V
-  void visit(const ir::operation::ZerosLike &op) override;
 
 private:
   /**
@@ -127,11 +109,6 @@ private:
    */
   const ir::Operands &_operands;
   /**
-   * @brief To allocate memory for output tensor if needed
-   */
-  // TODO Remove this, as it is no longer used
-  backend::IDynamicTensorManager *_dynamic_tensor_manager;
-  /**
    * @brief To get tensor object and access tensor-level info, e.g., ITensor::buffer()
    */
   std::shared_ptr<backend::ITensorRegistry> _tensor_registry;
diff --git a/runtime/onert/core/include/exec/IExecutor.h b/runtime/onert/core/include/exec/IExecutor.h
index 46e05a289..6c8bab67c 100644
--- a/runtime/onert/core/include/exec/IExecutor.h
+++ b/runtime/onert/core/include/exec/IExecutor.h
@@ -80,8 +80,6 @@ struct DynAllocInfo
 {
   /// @brief index of input tensor whose memory needs to be allocated at execution time
   ir::OperandIndex ind;
-  /// @brief dynamic tensor manager that can allocate memory when input tensor is dynamic
-  backend::IDynamicTensorManager *dyn_tensor_manager;
 };
 
 using DynAllocInfoMap = std::unordered_map<std::shared_ptr<backend::ITensor>, DynAllocInfo>;
diff --git a/runtime/onert/core/include/exec/IODescription.h b/runtime/onert/core/include/exec/IODescription.h
index c10c36756..d1810ec3b 100644
--- a/runtime/onert/core/include/exec/IODescription.h
+++ b/runtime/onert/core/include/exec/IODescription.h
@@ -62,8 +62,8 @@ struct IODescription
 {
   std::vector<std::unique_ptr<InputDesc>> inputs;
   std::vector<std::unique_ptr<OutputDesc>> outputs;
-  // Contains shape of input set by set_input_tensorinfo
-  std::unordered_map<ir::IOIndex, ir::Shape> input_shape_signature;
+  // Contains shape of input set by nnfw_set_input_tensorinfo(..)
+  std::unordered_map<ir::IOIndex, ir::Shape> dynamic_input_shapes;
 };
 
 } // namespace exec
diff --git a/runtime/onert/core/include/ir/Graph.h b/runtime/onert/core/include/ir/Graph.h
index fb956fedf..2103e6e64 100644
--- a/runtime/onert/core/include/ir/Graph.h
+++ b/runtime/onert/core/include/ir/Graph.h
@@ -60,8 +60,8 @@ public:
   OperandIndex addOperand(const Shape &shape, const TypeInfo &type);
   OperationIndex addOperation(std::unique_ptr<Operation> &&node);
   void setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data);
-  void addInput(const OperandIndex &ind);
-  void addOutput(const OperandIndex &ind);
+  void addInput(const OperandIndex &ind, const std::string &name = "");
+  void addOutput(const OperandIndex &ind, const std::string &name = "");
   void finishBuilding(void);
   void removeOperand(const OperandIndex &ind) { _operands.remove(ind); }
   bool isBuildingPhase(void) const { return _phase == Phase::BUILDING; }
@@ -94,6 +94,8 @@ public:
   OperandIndexSequence &getInputs() { return _inputs; }
   const OperandIndexSequence &getOutputs() const { return _outputs; }
   OperandIndexSequence &getOutputs() { return _outputs; }
+  IOIndex getInputIndex(const std::string &name) const;
+  IOIndex getOutputIndex(const std::string &name) const;
   const Operands &operands() const { return _operands; }
   Operands &operands() { return _operands; } // TODO Remove this non-const accessor
   const Operations &operations() const { return _operations; }
@@ -108,6 +110,8 @@ private:
   Operands _operands;
   OperandIndexSequence _inputs;
   OperandIndexSequence _outputs;
+  std::unordered_map<std::string, IOIndex> _name_to_input;
+  std::unordered_map<std::string, IOIndex> _name_to_output;
   // Child subgraphs
   std::shared_ptr<Subgraphs> _subgraphs;
   // TFLite and circle's default layout is NHWC;
diff --git a/runtime/onert/core/include/ir/InternalType.h b/runtime/onert/core/include/ir/InternalType.h
index e42db72cf..1d962c185 100644
--- a/runtime/onert/core/include/ir/InternalType.h
+++ b/runtime/onert/core/include/ir/InternalType.h
@@ -40,6 +40,12 @@ struct Stride
   uint32_t horizontal;
 };
 
+struct Dilation
+{
+  uint32_t width_factor;
+  uint32_t height_factor;
+};
+
 } // namespace ir
 } // namespace onert
 
diff --git a/runtime/onert/core/include/ir/LoweredGraph.h b/runtime/onert/core/include/ir/LoweredGraph.h
deleted file mode 100644
index d6583df24..000000000
--- a/runtime/onert/core/include/ir/LoweredGraph.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_LOWERED_GRAPH_H__
-#define __ONERT_IR_LOWERED_GRAPH_H__
-
-#include "ir/Graph.h"
-#include "ir/LowerInfoMap.h"
-#include "ir/OpSequences.h"
-#include "compiler/BackendResolver.h"
-#include "compiler/Compiler.h"
-
-namespace onert
-{
-namespace ir
-{
-
-/**
- * @brief Class that contains lowering information on graph.
- *        In addition, after lowering, operands in graph will be set to "dynamic"
- *        if the shape of output of an operation cannot be decided at compilation time.
- */
-class LoweredGraph
-{
-public:
-  LoweredGraph(const Graph &graph, const compiler::CompilerOptions &options);
-
-  Graph &graph() { return _graph; }
-  const Graph &graph() const { return _graph; }
-  const LowerInfoMap *getLowerInfo() const { return &_lower_info_map; }
-  const operation::LowerInfo *getLowerInfo(const OpSequenceIndex &op_seq_index) const;
-  void setLowerInfo(const OpSequenceIndex &op_seq_index,
-                    std::unique_ptr<operation::LowerInfo> &&lower_info);
-  void removeLowerInfo(const OpSequenceIndex &op_seq_index);
-  const operand::LowerInfo *getLowerInfo(const OperandIndex &index) const;
-  operand::LowerInfo *getLowerInfo(const OperandIndex &index);
-  void setLowerInfo(const OperandIndex &index, std::unique_ptr<operand::LowerInfo> &&lower_info);
-  void removeLowerInfo(const OperandIndex &index);
-  OpSequences &op_seqs() { return _op_seqs; }
-  const OpSequences &op_seqs() const { return _op_seqs; }
-  void iterateTopolOpSeqs(
-      const std::function<void(const OpSequenceIndex &, const OpSequence &)> &fn) const;
-  void iterateTopolOpSeqs(const std::function<void(const OpSequenceIndex &, OpSequence &)> &fn);
-  const backend::BackendContexts &backend_contexts() { return _backend_contexts; }
-  const backend::BackendContexts &backend_contexts() const { return _backend_contexts; }
-  std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; }
-
-private:
-  void makeOpSequences(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info,
-                       const compiler::CompilerOptions &options,
-                       const compiler::BackendResolver &backend_resolver);
-
-  void
-  manipulateLowerInfo(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info,
-                      bool is_primary);
-  void dumpLowerInfo();
-  bool mergeable(const OpSequenceIndex &op_seq_index, const OperationIndex &node_index,
-                 Layout layout, const compiler::BackendResolver &backend_resolver);
-  OpSequenceIndex appendFreshSingleOpSequence(const OperationIndex &node_index,
-                                              const Operation &node);
-
-private:
-  Graph _graph;
-  backend::BackendContexts _backend_contexts;
-  std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
-  LowerInfoMap _lower_info_map;
-  // Pass(for Perm) can accept only graph so that Graph has OpSequences as a member
-  OpSequences _op_seqs;
-};
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_LOWERED_GRAPH_H__
diff --git a/runtime/onert/core/include/ir/OpSequences.h b/runtime/onert/core/include/ir/OpSequences.h
index 6ed8499bc..ab258f395 100644
--- a/runtime/onert/core/include/ir/OpSequences.h
+++ b/runtime/onert/core/include/ir/OpSequences.h
@@ -63,13 +63,6 @@ public:
    */
   OpSequenceIndex getOperation(const OperationIndex &operation_index) const;
   /**
-   * @brief Dump OpSequences
-   *
-   * @param msg Message that will be displayed
-   * @param graph Graph that has information used for dump
-   */
-  void dump(const std::string &msg, const Operations &operations) const;
-  /**
    * @brief Remove an operation from OpSequence
    *
    * @param operation_index Operation index to be removed
@@ -84,6 +77,14 @@ private:
   mutable std::unordered_map<OperationIndex, OpSequenceIndex> _seq_indexes;
 };
 
+/**
+ * @brief Dump OpSequences
+ *
+ * @param op_seqs Operation Sequences
+ * @param operations Operation context
+ */
+void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations);
+
 } // namespace ir
 } // namespace onert
 
diff --git a/runtime/onert/core/include/ir/Operations.Include.h b/runtime/onert/core/include/ir/Operations.Include.h
index 30c4ff25a..17bbbc29c 100644
--- a/runtime/onert/core/include/ir/Operations.Include.h
+++ b/runtime/onert/core/include/ir/Operations.Include.h
@@ -17,10 +17,10 @@
 // This file has no ifdef guard intentionally
 
 #include "ir/operation/BatchToSpaceND.h"
+#include "ir/operation/BinaryArithmetic.h"
 #include "ir/operation/BroadcastTo.h"
 #include "ir/operation/Conv2D.h"
-#include "ir/operation/MaxPool2D.h"
-#include "ir/operation/AvgPool2D.h"
+#include "ir/operation/Pool2D.h"
 #include "ir/operation/Concat.h"
 #include "ir/operation/Reshape.h"
 #include "ir/operation/Fill.h"
@@ -29,51 +29,32 @@
 #include "ir/operation/Transpose.h"
 #include "ir/operation/Permute.h"
 #include "ir/operation/Reduce.h"
-#include "ir/operation/Add.h"
-#include "ir/operation/Sub.h"
 #include "ir/operation/DepthwiseConv2D.h"
 #include "ir/operation/Slice.h"
 #include "ir/operation/StridedSlice.h"
-#include "ir/operation/Mul.h"
 #include "ir/operation/Squeeze.h"
-#include "ir/operation/Tanh.h"
-#include "ir/operation/Log.h"
-#include "ir/operation/Logistic.h"
-#include "ir/operation/Cast.h"
-#include "ir/operation/Div.h"
-#include "ir/operation/Exp.h"
+#include "ir/operation/ElementwiseActivation.h"
+#include "ir/operation/ElementwiseBinary.h"
+#include "ir/operation/ElementwiseUnary.h"
 #include "ir/operation/ExpandDims.h"
 #include "ir/operation/Comparison.h"
-#include "ir/operation/LogicalAnd.h"
-#include "ir/operation/LogicalOr.h"
-#include "ir/operation/LogicalNot.h"
 #include "ir/operation/LSTM.h"
-#include "ir/operation/RSQRT.h"
-#include "ir/operation/ReLU.h"
 #include "ir/operation/ResizeBilinear.h"
-#include "ir/operation/ReLU1.h"
-#include "ir/operation/ReLU6.h"
+#include "ir/operation/ResizeNearestNeighbor.h"
 #include "ir/operation/Reverse.h"
 #include "ir/operation/RNN.h"
-#include "ir/operation/Round.h"
-#include "ir/operation/Floor.h"
 #include "ir/operation/SpaceToBatchND.h"
 #include "ir/operation/SpaceToDepth.h"
-#include "ir/operation/L2Pool2D.h"
 #include "ir/operation/EmbeddingLookup.h"
 #include "ir/operation/L2Normalization.h"
 #include "ir/operation/HashtableLookup.h"
 #include "ir/operation/InstanceNorm.h"
 #include "ir/operation/PReLU.h"
 #include "ir/operation/TransposeConv.h"
-#include "ir/operation/SQRT.h"
 #include "ir/operation/SquaredDifference.h"
 #include "ir/operation/TopKV2.h"
 #include "ir/operation/Gather.h"
-#include "ir/operation/Neg.h"
-#include "ir/operation/Abs.h"
 #include "ir/operation/ArgMax.h"
-#include "ir/operation/Dequantize.h"
 #include "ir/operation/LocalResponseNormalization.h"
 #include "ir/operation/DepthToSpace.h"
 #include "ir/operation/Pack.h"
@@ -82,27 +63,22 @@
 #include "ir/operation/SplitV.h"
 #include "ir/operation/Unpack.h"
 #include "ir/operation/Pad.h"
-#include "ir/operation/Min.h"
-#include "ir/operation/Max.h"
 #include "ir/operation/Custom.h"
 #include "ir/operation/Einsum.h"
 #include "ir/operation/OneHot.h"
-#include "ir/operation/Cos.h"
-#include "ir/operation/Sin.h"
 #include "ir/operation/Shape.h"
 #include "ir/operation/ConvertFp32ToFp16.h"
 #include "ir/operation/ConvertFp16ToFp32.h"
 #include "ir/operation/If.h"
 #include "ir/operation/While.h"
 #include "ir/operation/Pow.h"
-#include "ir/operation/ZerosLike.h"
 #include "ir/operation/Tile.h"
 #include "ir/operation/Range.h"
+#include "ir/operation/Rank.h"
 #include "ir/operation/BCQFullyConnected.h"
 #include "ir/operation/BCQGather.h"
 #include "ir/operation/MatrixBandPart.h"
 #include "ir/operation/BatchMatMul.h"
 #include "ir/operation/FusedBatchNorm.h"
 #include "ir/operation/LogSoftmax.h"
-#include "ir/operation/Quantize.h"
 #include "ir/operation/StatelessRandomUniform.h"
diff --git a/runtime/onert/core/include/ir/Operations.lst b/runtime/onert/core/include/ir/Operations.lst
index 75c6d8221..ab2146821 100644
--- a/runtime/onert/core/include/ir/Operations.lst
+++ b/runtime/onert/core/include/ir/Operations.lst
@@ -19,62 +19,44 @@
 #endif
 
 // Internal Name
-OP(Add)
-OP(Sub)
 OP(BatchToSpaceND)
+OP(BinaryArithmetic)
 OP(BroadcastTo)
-OP(Cast)
 OP(Conv2D)
 OP(DepthwiseConv2D)
-OP(AvgPool2D)
-OP(MaxPool2D)
+OP(Pool2D)
 OP(Concat)
 OP(Fill)
 OP(FullyConnected)
 OP(Reduce)
 OP(Reshape)
-OP(Mul)
 OP(Softmax)
 OP(Squeeze)
 OP(Slice)
 OP(StridedSlice)
-OP(Tanh)
-OP(Logistic)
-OP(Div)
 OP(Transpose)
-OP(Exp)
+OP(ElementwiseActivation)
+OP(ElementwiseBinary)
+OP(ElementwiseUnary)
 OP(ExpandDims)
 OP(Comparison)
-OP(LogicalAnd)
-OP(LogicalOr)
-OP(LogicalNot)
 OP(LSTM)
-OP(RSQRT)
-OP(ReLU)
 OP(ResizeBilinear)
-OP(ReLU1)
-OP(ReLU6)
+OP(ResizeNearestNeighbor)
 OP(Reverse)
 OP(RNN)
-OP(Round)
-OP(Floor)
 OP(SpaceToBatchND)
 OP(SpaceToDepth)
-OP(L2Pool2D)
 OP(EmbeddingLookup)
 OP(L2Normalization)
 OP(HashtableLookup)
 OP(InstanceNorm)
 OP(PReLU)
 OP(TransposeConv)
-OP(SQRT)
 OP(SquaredDifference)
 OP(TopKV2)
 OP(Gather)
-OP(Neg)
-OP(Abs)
 OP(ArgMax)
-OP(Dequantize)
 OP(Einsum)
 OP(LocalResponseNormalization)
 OP(DepthToSpace)
@@ -86,26 +68,20 @@ OP(Unpack)
 OP(Pad)
 OP(Custom)
 OP(Permute)
-OP(Min)
-OP(Max)
 OP(OneHot)
-OP(Cos)
-OP(Sin)
 OP(Shape)
 OP(ConvertFp32ToFp16)
 OP(ConvertFp16ToFp32)
 OP(If)
 OP(While)
-OP(Log)
 OP(Pow)
-OP(ZerosLike)
 OP(Tile)
 OP(Range)
+OP(Rank)
 OP(BCQFullyConnected)
 OP(BCQGather)
 OP(MatrixBandPart)
 OP(BatchMatMul)
 OP(FusedBatchNorm)
 OP(LogSoftmax)
-OP(Quantize)
 OP(StatelessRandomUniform)
diff --git a/runtime/onert/core/include/ir/Padding.h b/runtime/onert/core/include/ir/Padding.h
index b9053914d..8a7bcdbeb 100644
--- a/runtime/onert/core/include/ir/Padding.h
+++ b/runtime/onert/core/include/ir/Padding.h
@@ -65,7 +65,8 @@ struct Padding
 // TODO Change to Padding struct's method
 const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape,
                                        const FeatureShape &ofm_shape, const Stride &stride,
-                                       uint32_t kw, uint32_t kh);
+                                       uint32_t kw, uint32_t kh, uint32_t dwf = 1,
+                                       uint32_t dhf = 1);
 
 } // namespace ir
 } // namespace onert
diff --git a/runtime/onert/core/include/ir/operation/Abs.h b/runtime/onert/core/include/ir/operation/Abs.h
deleted file mode 100644
index 9126c0027..000000000
--- a/runtime/onert/core/include/ir/operation/Abs.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ABS_H__
-#define __ONERT_IR_OPERATION_ABS_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Abs : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Abs(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Abs; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ABS_H__
diff --git a/runtime/onert/core/include/ir/operation/Add.h b/runtime/onert/core/include/ir/operation/BinaryArithmetic.h
index 5f5f4e0fe..110fff565 100644
--- a/runtime/onert/core/include/ir/operation/Add.h
+++ b/runtime/onert/core/include/ir/operation/BinaryArithmetic.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_IR_OPERATION_ADD_H__
-#define __ONERT_IR_OPERATION_ADD_H__
+#ifndef __ONERT_IR_OPERATION_BINARY_ARITHMETIC_H__
+#define __ONERT_IR_OPERATION_BINARY_ARITHMETIC_H__
 
 #include "ir/Operation.h"
 #include "ir/InternalType.h"
@@ -27,7 +27,7 @@ namespace ir
 namespace operation
 {
 
-class Add : public Operation
+class BinaryArithmetic final : public Operation
 {
 public:
   enum Input
@@ -36,17 +36,28 @@ public:
     RHS
   };
 
+  enum class ArithmeticType
+  {
+    ADD,
+    SUB,
+    MUL,
+    DIV
+  };
+
   struct Param
   {
+    ArithmeticType arithmetic_type;
     Activation activation;
   };
 
 public:
-  Add(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+  BinaryArithmetic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+                   const Param &param);
 
 public:
   void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Add; }
+  std::string name() const override;
+  OpCode opcode() const final { return OpCode::BinaryArithmetic; }
 
 public:
   const Param &param() const { return _param; }
@@ -59,4 +70,4 @@ private:
 } // namespace ir
 } // namespace onert
 
-#endif // __ONERT_IR_OPERATION_ADD_H__
+#endif // __ONERT_IR_OPERATION_BINARY_ARITHMETIC_H__
diff --git a/runtime/onert/core/include/ir/operation/BroadcastTo.h b/runtime/onert/core/include/ir/operation/BroadcastTo.h
index 98906adc2..06c033497 100644
--- a/runtime/onert/core/include/ir/operation/BroadcastTo.h
+++ b/runtime/onert/core/include/ir/operation/BroadcastTo.h
@@ -42,7 +42,7 @@ public:
 
 public:
   void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Cast; }
+  OpCode opcode() const final { return OpCode::BroadcastTo; }
 };
 
 } // namespace operation
diff --git a/runtime/onert/core/include/ir/operation/Cast.h b/runtime/onert/core/include/ir/operation/Cast.h
deleted file mode 100644
index 6fb8c105b..000000000
--- a/runtime/onert/core/include/ir/operation/Cast.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_CAST_H__
-#define __ONERT_IR_OPERATION_CAST_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Cast : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Cast(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Cast; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_CAST_H__
diff --git a/runtime/onert/core/include/ir/operation/Conv2D.h b/runtime/onert/core/include/ir/operation/Conv2D.h
index e23bf3eb3..d8c7b671b 100644
--- a/runtime/onert/core/include/ir/operation/Conv2D.h
+++ b/runtime/onert/core/include/ir/operation/Conv2D.h
@@ -45,6 +45,7 @@ public:
     Stride stride;
     Padding padding;
     Activation activation;
+    Dilation dilation;
   };
 
 public:
diff --git a/runtime/onert/core/include/ir/operation/Cos.h b/runtime/onert/core/include/ir/operation/Cos.h
deleted file mode 100644
index a6d7851bd..000000000
--- a/runtime/onert/core/include/ir/operation/Cos.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_COS_H__
-#define __ONERT_IR_OPERATION_COS_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Cos : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Cos(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Cos; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_COS_H__
diff --git a/runtime/onert/core/include/ir/operation/Dequantize.h b/runtime/onert/core/include/ir/operation/Dequantize.h
deleted file mode 100644
index 97a08b33c..000000000
--- a/runtime/onert/core/include/ir/operation/Dequantize.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_DEQUANTIZE_H__
-#define __ONERT_IR_OPERATION_DEQUANTIZE_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Dequantize : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Dequantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Dequantize; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_DEQUANTIZE_H__
diff --git a/runtime/onert/core/include/ir/operation/Einsum.h b/runtime/onert/core/include/ir/operation/Einsum.h
index a3426ccbc..9892c24b8 100644
--- a/runtime/onert/core/include/ir/operation/Einsum.h
+++ b/runtime/onert/core/include/ir/operation/Einsum.h
@@ -41,7 +41,7 @@ public:
 
 public:
   void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Add; }
+  OpCode opcode() const final { return OpCode::Einsum; }
 
 public:
   const Param &param() const { return _param; }
diff --git a/runtime/onert/core/include/ir/operation/Div.h b/runtime/onert/core/include/ir/operation/ElementwiseActivation.h
index a7ec1c465..b2a1d3d2d 100644
--- a/runtime/onert/core/include/ir/operation/Div.h
+++ b/runtime/onert/core/include/ir/operation/ElementwiseActivation.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,11 +14,10 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_IR_OPERATION_DIV_H__
-#define __ONERT_IR_OPERATION_DIV_H__
+#ifndef __ONERT_IR_OPERATION_ELEMENTWISE_ACTIVATION_H__
+#define __ONERT_IR_OPERATION_ELEMENTWISE_ACTIVATION_H__
 
 #include "ir/Operation.h"
-#include "ir/InternalType.h"
 
 namespace onert
 {
@@ -27,30 +26,46 @@ namespace ir
 namespace operation
 {
 
-class Div : public Operation
+class ElementwiseActivation : public Operation
 {
 public:
   enum Input
   {
-    LHS = 0,
-    RHS
+    INPUT = 0
+  };
+
+  enum class Type
+  {
+    ELU,
+    LOGISTIC,
+    RELU,
+    TANH,
+    LEAKY_RELU
   };
 
   struct Param
   {
-    Activation activation;
+    Type op_type;
+    float alpha;
+    float beta;
+    Param() : op_type(Type::ELU), alpha(0.0f), beta(0.0f) {}
   };
 
 public:
-  Div(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+  ElementwiseActivation(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+                        const Param &param);
 
 public:
   void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Div; }
+  std::string name() const override;
+  OpCode opcode() const final { return OpCode::ElementwiseActivation; }
 
 public:
   const Param &param() const { return _param; }
 
+public:
+  static float infinity;
+
 private:
   Param _param;
 };
@@ -59,4 +74,4 @@ private:
 } // namespace ir
 } // namespace onert
 
-#endif // __ONERT_IR_OPERATION_DIV_H__
+#endif // __ONERT_IR_OPERATION_ELEMENTWISE_ACTIVATION_H__
diff --git a/runtime/onert/core/include/ir/operation/Mul.h b/runtime/onert/core/include/ir/operation/ElementwiseBinary.h
index 0f01b0ecf..dd07f6058 100644
--- a/runtime/onert/core/include/ir/operation/Mul.h
+++ b/runtime/onert/core/include/ir/operation/ElementwiseBinary.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,11 +14,10 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_IR_OPERATION_MUL_H__
-#define __ONERT_IR_OPERATION_MUL_H__
+#ifndef __ONERT_IR_OPERATION_ELEMENTWISEBINARY_H__
+#define __ONERT_IR_OPERATION_ELEMENTWISEBINARY_H__
 
 #include "ir/Operation.h"
-#include "ir/InternalType.h"
 
 namespace onert
 {
@@ -27,7 +26,7 @@ namespace ir
 namespace operation
 {
 
-class Mul : public Operation
+class ElementwiseBinary : public Operation
 {
 public:
   enum Input
@@ -36,17 +35,27 @@ public:
     RHS
   };
 
+  enum class ElementwiseBinaryType
+  {
+    LOGICAL_AND,
+    LOGICAL_OR,
+    MAX,
+    MIN
+  };
+
   struct Param
   {
-    Activation activation;
+    ElementwiseBinaryType op_type;
   };
 
 public:
-  Mul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+  ElementwiseBinary(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+                    const Param &param);
 
 public:
   void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Mul; }
+  std::string name() const override;
+  OpCode opcode() const final { return OpCode::ElementwiseBinary; }
 
 public:
   const Param &param() const { return _param; }
@@ -59,4 +68,4 @@ private:
 } // namespace ir
 } // namespace onert
 
-#endif // __ONERT_IR_OPERATION_MUL_H__
+#endif // __ONERT_IR_OPERATION_ELEMENTWISEBINARY_H__
diff --git a/runtime/onert/core/include/ir/operation/MaxPool2D.h b/runtime/onert/core/include/ir/operation/ElementwiseUnary.h
index 300f7cb3c..c40778a56 100644
--- a/runtime/onert/core/include/ir/operation/MaxPool2D.h
+++ b/runtime/onert/core/include/ir/operation/ElementwiseUnary.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,14 +14,10 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_IR_OPERATION_MAXPOOL2D_H__
-#define __ONERT_IR_OPERATION_MAXPOOL2D_H__
-
-#include <memory>
+#ifndef __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
+#define __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
 
 #include "ir/Operation.h"
-#include "ir/InternalType.h"
-#include "ir/Padding.h"
 
 namespace onert
 {
@@ -30,7 +26,7 @@ namespace ir
 namespace operation
 {
 
-class MaxPool2D : public Operation
+class ElementwiseUnary : public Operation
 {
 public:
   enum Input
@@ -38,22 +34,40 @@ public:
     INPUT = 0
   };
 
+  enum class Type
+  {
+    ABS,
+    CAST,
+    COS,
+    DEQUANTIZE,
+    ERF,
+    EXP,
+    FLOOR,
+    LOG,
+    LOGICAL_NOT,
+    NEG,
+    QUANTIZE,
+    ROUND,
+    RSQRT,
+    SIN,
+    SQRT,
+    SQURE,
+    ZEROS_LIKE
+  };
+
   struct Param
   {
-    uint32_t kh;
-    uint32_t kw;
-    Stride stride;
-    Padding padding;
-    Activation activation;
+    Type op_type;
   };
 
 public:
-  MaxPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-            const Param &param);
+  ElementwiseUnary(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+                   const Param &param);
 
 public:
   void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::MaxPool2D; }
+  std::string name() const override;
+  OpCode opcode() const final { return OpCode::ElementwiseUnary; }
 
 public:
   const Param &param() const { return _param; }
@@ -66,4 +80,4 @@ private:
 } // namespace ir
 } // namespace onert
 
-#endif // __ONERT_IR_OPERATION_MAXPOOL2D_H__
+#endif // __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
diff --git a/runtime/onert/core/include/ir/operation/Exp.h b/runtime/onert/core/include/ir/operation/Exp.h
deleted file mode 100644
index 2e68ff07a..000000000
--- a/runtime/onert/core/include/ir/operation/Exp.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_EXP_H__
-#define __ONERT_IR_OPERATION_EXP_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Exp : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Exp(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Exp; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_EXP_H__
diff --git a/runtime/onert/core/include/ir/operation/Floor.h b/runtime/onert/core/include/ir/operation/Floor.h
deleted file mode 100644
index b34699c22..000000000
--- a/runtime/onert/core/include/ir/operation/Floor.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_FLOOR_H__
-#define __ONERT_IR_OPERATION_FLOOR_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Floor : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Floor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Floor; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_FLOOR_H__
diff --git a/runtime/onert/core/include/ir/operation/Log.h b/runtime/onert/core/include/ir/operation/Log.h
deleted file mode 100644
index a6e3ca3f6..000000000
--- a/runtime/onert/core/include/ir/operation/Log.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOG_H__
-#define __ONERT_IR_OPERATION_LOG_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Log : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Log(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Log; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOG_H__
diff --git a/runtime/onert/core/include/ir/operation/LogicalAnd.h b/runtime/onert/core/include/ir/operation/LogicalAnd.h
deleted file mode 100644
index dc853b6a9..000000000
--- a/runtime/onert/core/include/ir/operation/LogicalAnd.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGICAL_AND_H__
-#define __ONERT_IR_OPERATION_LOGICAL_AND_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class LogicalAnd : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT0 = 0,
-    INPUT1 = 1,
-  };
-
-public:
-  LogicalAnd(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::LogicalAnd; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGICAL_AND_H__
diff --git a/runtime/onert/core/include/ir/operation/LogicalNot.h b/runtime/onert/core/include/ir/operation/LogicalNot.h
deleted file mode 100644
index 9519f6d47..000000000
--- a/runtime/onert/core/include/ir/operation/LogicalNot.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGICAL_NOT_H__
-#define __ONERT_IR_OPERATION_LOGICAL_NOT_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class LogicalNot : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0,
-  };
-
-public:
-  LogicalNot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::LogicalNot; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGICAL_NOT_H__
diff --git a/runtime/onert/core/include/ir/operation/LogicalOr.h b/runtime/onert/core/include/ir/operation/LogicalOr.h
deleted file mode 100644
index c4b658cd9..000000000
--- a/runtime/onert/core/include/ir/operation/LogicalOr.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGICAL_OR_H__
-#define __ONERT_IR_OPERATION_LOGICAL_OR_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class LogicalOr : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT0 = 0,
-    INPUT1 = 1,
-  };
-
-public:
-  LogicalOr(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::LogicalOr; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGICAL_OR_H__
diff --git a/runtime/onert/core/include/ir/operation/Logistic.h b/runtime/onert/core/include/ir/operation/Logistic.h
deleted file mode 100644
index 5421e1c84..000000000
--- a/runtime/onert/core/include/ir/operation/Logistic.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGISTIC_H__
-#define __ONERT_IR_OPERATION_LOGISTIC_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Logistic : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Logistic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Logistic; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGISTIC_H__
diff --git a/runtime/onert/core/include/ir/operation/Max.h b/runtime/onert/core/include/ir/operation/Max.h
deleted file mode 100644
index df72d3ae9..000000000
--- a/runtime/onert/core/include/ir/operation/Max.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_MAX_H__
-#define __ONERT_IR_OPERATION_MAX_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Max : public Operation
-{
-public:
-  enum Input
-  {
-    LHS = 0,
-    RHS
-  };
-
-public:
-  Max(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Max; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_MAX_H__
diff --git a/runtime/onert/core/include/ir/operation/Mean.h b/runtime/onert/core/include/ir/operation/Mean.h
deleted file mode 100644
index ce2da908d..000000000
--- a/runtime/onert/core/include/ir/operation/Mean.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_MEAN_H__
-#define __ONERT_IR_OPERATION_MEAN_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Mean : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT,
-    AXES
-  };
-
-  struct Param
-  {
-    bool keep_dims;
-  };
-
-public:
-  Mean(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Mean; }
-
-public:
-  const Param &param() const { return _param; }
-
-private:
-  Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_MEAN_H__
diff --git a/runtime/onert/core/include/ir/operation/Min.h b/runtime/onert/core/include/ir/operation/Min.h
deleted file mode 100644
index 117301c00..000000000
--- a/runtime/onert/core/include/ir/operation/Min.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_MIN_H__
-#define __ONERT_IR_OPERATION_MIN_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Min : public Operation
-{
-public:
-  enum Input
-  {
-    LHS = 0,
-    RHS
-  };
-
-public:
-  Min(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Min; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_MIN_H__
diff --git a/runtime/onert/core/include/ir/operation/Neg.h b/runtime/onert/core/include/ir/operation/Neg.h
deleted file mode 100644
index f8123c485..000000000
--- a/runtime/onert/core/include/ir/operation/Neg.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_NEG_H__
-#define __ONERT_IR_OPERATION_NEG_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Neg : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Neg(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Neg; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_NEG_H__
diff --git a/runtime/onert/core/include/ir/operation/AvgPool2D.h b/runtime/onert/core/include/ir/operation/Pool2D.h
index d5b300a35..22425b4c2 100644
--- a/runtime/onert/core/include/ir/operation/AvgPool2D.h
+++ b/runtime/onert/core/include/ir/operation/Pool2D.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_IR_OPERATION_AVGPOOL2D_H__
-#define __ONERT_IR_OPERATION_AVGPOOL2D_H__
+#ifndef __ONERT_IR_OPERATION_POOL2D_H__
+#define __ONERT_IR_OPERATION_POOL2D_H__
 
 #include <memory>
 
@@ -30,7 +30,7 @@ namespace ir
 namespace operation
 {
 
-class AvgPool2D : public Operation
+class Pool2D : public Operation
 {
 public:
   enum Input
@@ -38,23 +38,31 @@ public:
     INPUT = 0
   };
 
+  enum class PoolType
+  {
+    AVG,
+    L2,
+    MAX,
+  };
+
   struct Param
   {
+    PoolType op_type;
     uint32_t kh;
     uint32_t kw;
-
     Stride stride;
     Padding padding;
     Activation activation;
   };
 
 public:
-  AvgPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-            const Param &param);
+  Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+         const Param &param);
 
 public:
   void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::AvgPool2D; }
+  std::string name() const override;
+  OpCode opcode() const final { return OpCode::Pool2D; }
 
 public:
   const Param &param() const { return _param; }
@@ -67,4 +75,4 @@ private:
 } // namespace ir
 } // namespace onert
 
-#endif // __ONERT_IR_OPERATION_AVGPOOL2D_H__
+#endif // __ONERT_IR_OPERATION_POOL2D_H__
diff --git a/runtime/onert/core/include/ir/operation/Quantize.h b/runtime/onert/core/include/ir/operation/Quantize.h
deleted file mode 100644
index 2533ce432..000000000
--- a/runtime/onert/core/include/ir/operation/Quantize.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_QUANTIZE_H__
-#define __ONERT_IR_OPERATION_QUANTIZE_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Quantize : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0,
-  };
-
-public:
-  Quantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Quantize; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_QUANTIZE_H__
diff --git a/runtime/onert/core/include/ir/operation/RSQRT.h b/runtime/onert/core/include/ir/operation/RSQRT.h
deleted file mode 100644
index 64bb4f10a..000000000
--- a/runtime/onert/core/include/ir/operation/RSQRT.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_RSQRT_H__
-#define __ONERT_IR_OPERATION_RSQRT_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class RSQRT : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  RSQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::RSQRT; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_RSQRT_H__
diff --git a/runtime/onert/core/include/ir/operation/Round.h b/runtime/onert/core/include/ir/operation/Rank.h
index 44af0d861..2fd24ce23 100644
--- a/runtime/onert/core/include/ir/operation/Round.h
+++ b/runtime/onert/core/include/ir/operation/Rank.h
@@ -14,8 +14,10 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_IR_OPERATION_ROUND_H__
-#define __ONERT_IR_OPERATION_ROUND_H__
+#ifndef __ONERT_IR_OPERATION_RANK_H__
+#define __ONERT_IR_OPERATION_RANK_H__
+
+#include <memory>
 
 #include "ir/Operation.h"
 
@@ -26,7 +28,7 @@ namespace ir
 namespace operation
 {
 
-class Round : public Operation
+class Rank : public Operation
 {
 public:
   enum Input
@@ -35,15 +37,15 @@ public:
   };
 
 public:
-  Round(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+  Rank(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
 
 public:
   void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Round; }
+  OpCode opcode() const final { return OpCode::Rank; }
 };
 
 } // namespace operation
 } // namespace ir
 } // namespace onert
 
-#endif // __ONERT_IR_OPERATION_ROUND_H__
+#endif // __ONERT_IR_OPERATION_RANK_H__
diff --git a/runtime/onert/core/include/ir/operation/ReLU.h b/runtime/onert/core/include/ir/operation/ReLU.h
deleted file mode 100644
index 9eb0c091b..000000000
--- a/runtime/onert/core/include/ir/operation/ReLU.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_RELU_H__
-#define __ONERT_IR_OPERATION_RELU_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReLU : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  ReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::ReLU; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_RELU_H__
diff --git a/runtime/onert/core/include/ir/operation/ReLU1.h b/runtime/onert/core/include/ir/operation/ReLU1.h
deleted file mode 100644
index 134ee573a..000000000
--- a/runtime/onert/core/include/ir/operation/ReLU1.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ReLU1_H__
-#define __ONERT_IR_OPERATION_ReLU1_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReLU1 : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  ReLU1(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::ReLU1; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ReLU1_H__
diff --git a/runtime/onert/core/include/ir/operation/ReLU6.h b/runtime/onert/core/include/ir/operation/ReLU6.h
deleted file mode 100644
index e658c4925..000000000
--- a/runtime/onert/core/include/ir/operation/ReLU6.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ReLU6_H__
-#define __ONERT_IR_OPERATION_ReLU6_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReLU6 : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  ReLU6(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::ReLU6; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ReLU6_H__
diff --git a/runtime/onert/core/include/ir/operation/L2Pool2D.h b/runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h
index d369fd5fc..e4d810eeb 100644
--- a/runtime/onert/core/include/ir/operation/L2Pool2D.h
+++ b/runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,14 +14,12 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_IR_OPERATION_L2_POOL_2D_H__
-#define __ONERT_IR_OPERATION_L2_POOL_2D_H__
+#ifndef __ONERT_IR_OPERATION_RESIZE_NEAREST_NEIGHBOR_H__
+#define __ONERT_IR_OPERATION_RESIZE_NEAREST_NEIGHBOR_H__
 
 #include <memory>
 
 #include "ir/Operation.h"
-#include "ir/InternalType.h"
-#include "ir/Padding.h"
 
 namespace onert
 {
@@ -30,7 +28,7 @@ namespace ir
 namespace operation
 {
 
-class L2Pool2D : public Operation
+class ResizeNearestNeighbor : public Operation
 {
 public:
   enum Input
@@ -40,20 +38,18 @@ public:
 
   struct Param
   {
-    Padding padding;
-    Stride stride;
-    uint32_t kw;
-    uint32_t kh;
-    Activation activation;
+    int32_t height_out;
+    int32_t width_out;
+    bool align_corners;
   };
 
 public:
-  L2Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-           const Param &param);
+  ResizeNearestNeighbor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+                        const Param &param);
 
 public:
   void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::L2Pool2D; }
+  OpCode opcode() const final { return OpCode::ResizeNearestNeighbor; }
 
 public:
   const Param &param() const { return _param; }
@@ -66,4 +62,4 @@ private:
 } // namespace ir
 } // namespace onert
 
-#endif // __ONERT_IR_OPERATION_L2_POOL_2D_H__
+#endif // __ONERT_IR_OPERATION_RESIZE_NEAREST_NEIGHBOR_H__
diff --git a/runtime/onert/core/include/ir/operation/SQRT.h b/runtime/onert/core/include/ir/operation/SQRT.h
deleted file mode 100644
index 8563b1ab1..000000000
--- a/runtime/onert/core/include/ir/operation/SQRT.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_SQRT_H__
-#define __ONERT_IR_OPERATION_SQRT_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class SQRT : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  SQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::SQRT; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_SQRT_H__
diff --git a/runtime/onert/core/include/ir/operation/Select.h b/runtime/onert/core/include/ir/operation/Select.h
index 400ac9d3e..33bf67886 100644
--- a/runtime/onert/core/include/ir/operation/Select.h
+++ b/runtime/onert/core/include/ir/operation/Select.h
@@ -41,7 +41,7 @@ public:
 
 public:
   void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Abs; }
+  OpCode opcode() const final { return OpCode::Select; }
 };
 
 } // namespace operation
diff --git a/runtime/onert/core/include/ir/operation/Sin.h b/runtime/onert/core/include/ir/operation/Sin.h
deleted file mode 100644
index aef44ab2e..000000000
--- a/runtime/onert/core/include/ir/operation/Sin.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_SIN_H__
-#define __ONERT_IR_OPERATION_SIN_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Sin : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Sin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Sin; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_SIN_H__
diff --git a/runtime/onert/core/include/ir/operation/Sub.h b/runtime/onert/core/include/ir/operation/Sub.h
deleted file mode 100644
index 0674e6e4d..000000000
--- a/runtime/onert/core/include/ir/operation/Sub.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_SUB_H__
-#define __ONERT_IR_OPERATION_SUB_H__
-
-#include "ir/Operation.h"
-#include "ir/InternalType.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Sub : public Operation
-{
-public:
-  enum Input
-  {
-    LHS = 0,
-    RHS
-  };
-
-  struct Param
-  {
-    Activation activation;
-  };
-
-public:
-  Sub(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Sub; }
-
-public:
-  const Param &param() const { return _param; }
-
-private:
-  Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_SUB_H__
diff --git a/runtime/onert/core/include/ir/operation/Tanh.h b/runtime/onert/core/include/ir/operation/Tanh.h
deleted file mode 100644
index 9b8d03bca..000000000
--- a/runtime/onert/core/include/ir/operation/Tanh.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_TANH_H__
-#define __ONERT_IR_OPERATION_TANH_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Tanh : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Tanh(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Tanh; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_TANH_H__
diff --git a/runtime/onert/core/include/ir/operation/ZerosLike.h b/runtime/onert/core/include/ir/operation/ZerosLike.h
deleted file mode 100644
index 7c2851858..000000000
--- a/runtime/onert/core/include/ir/operation/ZerosLike.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ZEROS_LIKE_H__
-#define __ONERT_IR_OPERATION_ZEROS_LIKE_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ZerosLike : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  ZerosLike(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::ZerosLike; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ZEROS_LIKE_H__
diff --git a/runtime/onert/core/include/util/Config.lst b/runtime/onert/core/include/util/Config.lst
index 1718e034c..5077fad69 100644
--- a/runtime/onert/core/include/util/Config.lst
+++ b/runtime/onert/core/include/util/Config.lst
@@ -20,7 +20,7 @@
 
 //     Name                    | Type         | Default
 CONFIG(GRAPH_DOT_DUMP          , int          , "0")
-CONFIG(BACKENDS                , std::string  , "cpu;acl_cl;acl_neon")
+CONFIG(BACKENDS                , std::string  , "cpu;acl_cl;acl_neon;bcq") // FIXME Remove bcq
 CONFIG(OP_BACKEND_ALLOPS       , std::string  , "")
 CONFIG(OP_BACKEND_MAP          , std::string  , "")
 CONFIG(DISABLE_COMPILE         , bool         , "0")
diff --git a/runtime/onert/backend/cpu/ops/ExpLayer.h b/runtime/onert/core/include/util/Exceptions.h
index cd27b0e40..fc3fa0f64 100644
--- a/runtime/onert/backend/cpu/ops/ExpLayer.h
+++ b/runtime/onert/core/include/util/Exceptions.h
@@ -14,44 +14,35 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
+#ifndef __ONERT_UTIL_ONERTEXCEPTION_H__
+#define __ONERT_UTIL_ONERTEXCEPTION_H__
 
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
+#include <string>
 
 namespace onert
 {
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
 
-class ExpLayer : public ::onert::exec::IFunction
+class OnertException : public std::exception
 {
 public:
-  ExpLayer();
-
-public:
-  void expFloat32();
+  OnertException(const std::string &msg) : _msg{msg} {}
+  OnertException(const std::string &tag, const std::string &msg) : _msg{tag + " : " + msg} {}
 
-  void expQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
+  const char *what() const noexcept override { return _msg.c_str(); }
 
 private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
+  std::string _msg;
+};
+
+class InsufficientBufferSizeException : public OnertException
+{
+public:
+  InsufficientBufferSizeException(const std::string &msg)
+      : OnertException{"InsufficientBufferSize", msg}
+  {
+  }
 };
 
-} // namespace ops
-} // namespace cpu
-} // namespace backend
 } // namespace onert
 
-#endif // __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
+#endif // __ONERT_UTIL_ONERTEXCEPTION_H__
diff --git a/runtime/onert/core/include/util/ShapeInference.h b/runtime/onert/core/include/util/ShapeInference.h
index a68c22b16..1ebed48f2 100644
--- a/runtime/onert/core/include/util/ShapeInference.h
+++ b/runtime/onert/core/include/util/ShapeInference.h
@@ -19,15 +19,13 @@
 
 #include "Utils.h"
 
-#include "ir/operation/AvgPool2D.h"
 #include "ir/operation/Concat.h"
-#include "ir/operation/MaxPool2D.h"
 #include "ir/operation/Conv2D.h"
 #include "ir/operation/DepthwiseConv2D.h"
+#include "ir/operation/Pool2D.h"
 #include "ir/operation/Reshape.h"
-#include "ir/operation/RSQRT.h"
 #include "ir/operation/StridedSlice.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
 #include "ir/Index.h"
 #include "ir/Layout.h"
 #include "ir/OperationVisitor.h"
@@ -46,8 +44,6 @@ using Shapes = std::vector<ir::Shape>;
 
 ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank);
 
-ir::Shape inferAvgPoolShape(const ir::Shape &in_shape, const ir::operation::AvgPool2D::Param &param,
-                            ir::Layout layout = ir::Layout::NHWC);
 ir::Shape inferBatchMatMulShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape,
                                 const ir::operation::BatchMatMul::Param &param);
 
@@ -74,15 +70,15 @@ ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &k
 ir::Shape inferGatherShape(const ir::Shape &input_shape, const ir::Shape &indices_shape, int axis,
                            int rank);
 
-ir::Shape inferMaxPoolShape(const ir::Shape &in_shape, const ir::operation::MaxPool2D::Param &param,
-                            ir::Layout layout = ir::Layout::NHWC);
-
 ir::Shape inferOnehotShape(const ir::Shape &input_shape, const int depth, int axis);
 
 ir::Shape inferPackShape(const ir::Shape &input_shape, int axis, int rank, int num);
 
 ir::Shape inferPadShape(const ir::Shape &in_shape, const int32_t *pad_buf, const size_t num_pads);
 
+ir::Shape inferPoolShape(const ir::Shape &in_shape, const ir::operation::Pool2D::Param &param,
+                         ir::Layout layout = ir::Layout::NHWC);
+
 template <typename T> ir::Shape inferRangeShape(T start_val, T limit_val, T delta_val);
 
 ir::Shape inferReshapeShape(const int32_t *shape_buf, const int32_t shape_num_elements,
diff --git a/runtime/onert/core/src/backend/controlflow/Backend.h b/runtime/onert/core/src/backend/controlflow/Backend.h
index 3c7325912..670f7750f 100644
--- a/runtime/onert/core/src/backend/controlflow/Backend.h
+++ b/runtime/onert/core/src/backend/controlflow/Backend.h
@@ -21,6 +21,7 @@
 #include "ConstantInitializer.h"
 #include "KernelGenerator.h"
 #include "TensorBuilder.h"
+#include "Tensor.h"
 
 #include <backend/Backend.h>
 
@@ -63,10 +64,12 @@ public:
     //   there is no such case until now, let's support it later
     // TODO Remove TensorBuilder and ConstantInitializer
     // TODO Support Consecutive controflow operation's intermediate tensor
-    auto tb = std::make_shared<TensorBuilder>();
+    auto tr = std::make_shared<TensorRegistry>();
+    auto tb = std::make_shared<TensorBuilder>(tr);
+    context->tensor_registry = tr;
     context->tensor_builder = tb;
-    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
-    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb);
+    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb->dynamicTensorManager(), tr);
     context->tensor_register = nullptr;
     context->optimizer = nullptr;
     return context;
diff --git a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
index 35cc7835e..e21a8f357 100644
--- a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
+++ b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
@@ -17,7 +17,7 @@
 #ifndef __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
 #define __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
 
-#include "TensorBuilder.h"
+#include "TensorRegistry.h"
 
 #include <backend/IConstantInitializer.h>
 #include <ir/Operands.h>
@@ -33,16 +33,16 @@ class ConstantInitializer : public IConstantInitializer
 {
 public:
   ConstantInitializer(const ir::Operands &operands,
-                      const std::shared_ptr<TensorBuilder> &tensor_builder)
-      : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+                      const std::shared_ptr<ITensorRegistry> &tensor_reg)
+      : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
   {
   }
 
 private:
-  std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
+  std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
 
 private:
-  std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<ITensorRegistry> _tensor_reg;
 };
 
 } // namespace controlflow
diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
index e538f3fd3..1288e4c96 100644
--- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
+++ b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
@@ -17,6 +17,8 @@
 #include "DynamicTensorManager.h"
 
 #include "util/logging.h"
+#include "util/Exceptions.h"
+#include "ir/DataType.h"
 
 namespace onert
 {
@@ -25,10 +27,8 @@ namespace backend
 namespace controlflow
 {
 
-DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
-                                           const std::shared_ptr<UserTensorRegistry> &user_reg)
-    : _dynamic_mem_mgr{new cpu_common::DynamicMemoryManager()}, _tensors{reg},
-      _user_tensors{user_reg}
+DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry> &tensors)
+    : _dynamic_mem_mgr{new cpu_common::DynamicMemoryManager()}, _tensors{tensors}
 {
   // DO NOTHING
 }
@@ -36,20 +36,20 @@ DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<cpu_common::Ten
 void DynamicTensorManager::applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape)
 {
   // NOTE Handle user tensors first
-  auto user_tensor = _user_tensors->getNativeTensor(ind);
+  auto user_tensor = _tensors->getNativeUserTensor(ind);
   if (user_tensor)
   {
     // User tensors cannot be reallocated.
     auto buffer_size = user_tensor->total_size();
     auto new_size = new_shape.num_elements() * sizeOfDataType(user_tensor->data_type());
     if (buffer_size < new_size)
-      throw std::runtime_error{"ExecutorBase: output buffer size is less than output tensor size"};
+      throw InsufficientBufferSizeException{"Output buffer size is less than output tensor size"};
     user_tensor->setShape(new_shape);
     return;
   }
 
-  // NOTE Then handle native tensors
-  auto tensor = _tensors->getNativeTensor(ind);
+  // NOTE Then handle own tensors
+  auto tensor = _tensors->getNativeOwnTensor(ind);
   assert(tensor);
 
   bool previously_dynamic = tensor->is_dynamic();
@@ -102,24 +102,13 @@ void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
                                        const ir::OperandInfo &tensor_info,
                                        ir::Layout backend_layout)
 {
-  assert(_tensors->getNativeTensor(ind) == nullptr);
   auto tensor = std::make_shared<cpu_common::Tensor>(tensor_info, backend_layout, this);
-  _tensors->setNativeTensor(ind, tensor);
+  _tensors->setNativeOwnTensor(ind, tensor);
 }
 
 void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
 {
-  auto find = _dealloc_tensor_map.find(op_ind);
-  if (find != _dealloc_tensor_map.end())
-  {
-    auto &input_set = find->second;
-    input_set.emplace(operand_ind);
-  }
-  else
-  {
-    _dealloc_tensor_map.emplace(
-        std::make_pair(op_ind, std::unordered_set<ir::OperandIndex>{operand_ind}));
-  }
+  _dealloc_tensor_map[op_ind].emplace(operand_ind);
 }
 
 void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
index 446427d64..dbe388ba2 100644
--- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
+++ b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
@@ -17,11 +17,11 @@
 #ifndef __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
 #define __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
 
-#include "UserTensorRegistry.h"
+#include "TensorRegistry.h"
+#include "Tensor.h"
 
 #include <backend/IDynamicTensorManager.h>
 #include <backend/cpu_common/MemoryManager.h>
-#include <backend/cpu_common/TensorRegistry.h>
 #include <ir/OperandInfo.h>
 #include <ir/Operation.h>
 #include <ir/Index.h>
@@ -33,16 +33,13 @@ namespace backend
 namespace controlflow
 {
 
-// TODO Find optimized algorithm to manage memory.
-
 /**
  * @brief Class to manage dynamic tensor and its memory
  */
 class DynamicTensorManager : public backend::IDynamicTensorManager
 {
 public:
-  DynamicTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
-                       const std::shared_ptr<UserTensorRegistry> &user_reg);
+  DynamicTensorManager(const std::shared_ptr<TensorRegistry> &tensors);
 
   virtual ~DynamicTensorManager() = default;
 
@@ -61,9 +58,7 @@ private:
    * @todo  DynamicMemoryManager is not optimized. Optimized one is needed
    */
   std::shared_ptr<cpu_common::DynamicMemoryManager> _dynamic_mem_mgr;
-  // TODO Refactoring : Merge two TensorRegistries into one
-  const std::shared_ptr<cpu_common::TensorRegistry> _tensors;
-  const std::shared_ptr<UserTensorRegistry> _user_tensors;
+  const std::shared_ptr<TensorRegistry> _tensors;
 
   // contains list of dynamic tensor index, which can be deallocated after running operation
   // note: this map could contain static tensor index too. Careful use is required.
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
index eb83b7de4..de5a6a5f6 100644
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
+++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
@@ -31,24 +31,24 @@ namespace backend
 namespace controlflow
 {
 
-KernelGenerator::KernelGenerator(const ir::Graph &graph,
-                                 const std::shared_ptr<TensorBuilder> &tensor_builder)
-    : _graph{graph}, _tensor_builder{tensor_builder}, _tensor_builder_set{}, _executor_map{nullptr}
+KernelGenerator::KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
+                                 const std::shared_ptr<TensorRegistry> &tensor_reg)
+    : _graph{graph}, _dyn_tensor_manager{dyn_tensor_manager}, _tensor_reg{tensor_reg},
+      _tensor_registries{}, _executor_map{nullptr}
 {
   UNUSED_RELEASE(_graph);
-  UNUSED_RELEASE(_tensor_builder_set);
+  UNUSED_RELEASE(_tensor_registries);
   UNUSED_RELEASE(_executor_map);
 }
 
 void KernelGenerator::visit(const ir::OpSequence &op_seq)
 {
   assert(!_return_fn_seq);
-  assert(_tensor_builder->dynamicTensorManager());
-  assert(_tensor_builder->tensorRegistry());
+  assert(_dyn_tensor_manager);
+  assert(_tensor_reg);
 
-  auto dyn_tensor_manager = _tensor_builder->dynamicTensorManager();
-  auto dyn_shape_inferer = std::make_unique<exec::DynamicShapeInferer>(
-      _graph.operands(), dyn_tensor_manager, _tensor_builder->tensorRegistry());
+  auto dyn_shape_inferer =
+      std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg);
 
   _return_fn_seq = std::make_unique<exec::FunctionSequence>();
 
@@ -58,8 +58,8 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
     dyn_ctx->op_seq = &op_seq;
     dyn_ctx->operations = &_graph.operations();
     dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-    dyn_ctx->tensor_registry = _tensor_builder->tensorRegistry();
-    dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
+    dyn_ctx->tensor_registry = _tensor_reg;
+    dyn_ctx->dynamic_tensor_manager = _dyn_tensor_manager;
 
     _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
   }
@@ -93,12 +93,7 @@ void KernelGenerator::visit(const ir::operation::If &node)
     auto output_tensor = getTensor(output_index);
 
     output_tensors.emplace_back(output_tensor);
-    const auto output_tensor_builder = getTensorBuilder(output_index);
-    if (output_tensor_builder->supportDynamicTensor())
-    {
-      auto output_dyn_manager = output_tensor_builder->dynamicTensorManager();
-      outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index, output_dyn_manager};
-    }
+    outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index};
   }
 
   // IfLayer just set ExecutorMap instead of then and else executor to avoid complexity of
@@ -121,14 +116,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
   std::vector<std::shared_ptr<ITensor>> output_tensors{getTensor(output_index)};
   std::vector<std::shared_ptr<ITensor>> input_tensors{getTensor(input_index)};
   std::unordered_map<std::shared_ptr<ITensor>, exec::DynAllocInfo> outputs_dyn_alloc_info;
-  const auto output_tensor_builder = getTensorBuilder(output_index);
-  VERBOSE(PERMUTE_FIND_TB) << output_index << " -> " << output_tensor_builder.get() << std::endl;
-  assert(output_tensor_builder != nullptr);
-  if (output_tensor_builder->supportDynamicTensor())
-  {
-    outputs_dyn_alloc_info[output_tensors.at(0)] =
-        exec::DynAllocInfo{output_index, output_tensor_builder->dynamicTensorManager()};
-  }
+  outputs_dyn_alloc_info[output_tensors.at(0)] = exec::DynAllocInfo{output_index};
 
   auto fn =
       std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, outputs_dyn_alloc_info);
@@ -159,12 +147,7 @@ void KernelGenerator::visit(const ir::operation::While &node)
 
     output_tensors.emplace_back(output_tensor);
 
-    const auto output_tensor_builder = getTensorBuilder(output_index);
-    if (output_tensor_builder->supportDynamicTensor())
-    {
-      auto output_dyn_manager = output_tensor_builder->dynamicTensorManager();
-      outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index, output_dyn_manager};
-    }
+    outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index};
   }
 
   // WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of
@@ -178,34 +161,7 @@ void KernelGenerator::visit(const ir::operation::While &node)
 
 std::shared_ptr<backend::ITensor> KernelGenerator::getTensor(const ir::OperandIndex &index)
 {
-  std::shared_ptr<backend::ITensor> ret;
-  for (auto tensor_builder : _tensor_builder_set)
-  {
-    auto tensor = tensor_builder->tensorAt(index);
-    if (tensor)
-    {
-      ret = tensor;
-      break;
-    }
-  }
-  assert(ret != nullptr);
-  return ret;
-}
-
-std::shared_ptr<backend::ITensorBuilder>
-KernelGenerator::getTensorBuilder(const ir::OperandIndex &index)
-{
-  std::shared_ptr<backend::ITensorBuilder> ret;
-  for (auto tensor_builder : _tensor_builder_set)
-  {
-    auto reg = tensor_builder->tensorRegistry();
-    auto tensor = reg ? reg->getNativeITensor(index) : tensor_builder->tensorAt(index);
-    if (tensor)
-    {
-      ret = tensor_builder;
-      break;
-    }
-  }
+  std::shared_ptr<backend::ITensor> ret = _tensor_registries.getITensor(index);
   assert(ret != nullptr);
   return ret;
 }
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
index 1fc77935c..b84a810e4 100644
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
+++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
@@ -22,9 +22,8 @@
 #include <exec/IExecutor.h>
 #include <ir/Graph.h>
 #include "TensorBuilder.h"
-#include "compiler/TensorBuilders.h"
-
-#include "compiler/TensorBuilders.h"
+#include "compiler/TensorRegistries.h"
+#include "TensorRegistry.h"
 
 namespace onert
 {
@@ -36,11 +35,12 @@ namespace controlflow
 class KernelGenerator : public IKernelGenerator
 {
 public:
-  KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder);
+  KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
+                  const std::shared_ptr<TensorRegistry> &tensor_reg);
 
-  void setTensorBuilderSet(const compiler::TensorBuilders &tensor_builder_set)
+  void setTensorRegistries(const compiler::TensorRegistries &tensor_registries)
   {
-    _tensor_builder_set = tensor_builder_set;
+    _tensor_registries = tensor_registries;
   }
   void setExecutorMap(const std::shared_ptr<exec::ExecutorMap> &executor_map)
   {
@@ -57,12 +57,12 @@ public:
 
 private:
   std::shared_ptr<backend::ITensor> getTensor(const ir::OperandIndex &index);
-  std::shared_ptr<backend::ITensorBuilder> getTensorBuilder(const ir::OperandIndex &index);
 
 private:
   const ir::Graph &_graph;
-  std::shared_ptr<TensorBuilder> _tensor_builder;
-  compiler::TensorBuilders _tensor_builder_set;
+  IDynamicTensorManager *_dyn_tensor_manager;
+  std::shared_ptr<TensorRegistry> _tensor_reg;
+  compiler::TensorRegistries _tensor_registries;
   exec::ExecutorMap *_executor_map;
 };
 
diff --git a/runtime/onert/core/src/ir/operation/Log.cc b/runtime/onert/core/src/backend/controlflow/Tensor.h
index 85598bc87..ba5bafd75 100644
--- a/runtime/onert/core/src/ir/operation/Log.cc
+++ b/runtime/onert/core/src/backend/controlflow/Tensor.h
@@ -14,26 +14,22 @@
  * limitations under the License.
  */
 
-#include "ir/operation/Log.h"
+#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
+#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
 
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
+#include <backend/cpu_common/Tensor.h>
 
 namespace onert
 {
-namespace ir
+namespace backend
 {
-namespace operation
+namespace controlflow
 {
 
-void Log::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Log::Log(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
+using Tensor = cpu_common::Tensor;
 
-} // namespace operation
-} // namespace ir
+} // namespace controlflow
+} // namespace backend
 } // namespace onert
+
+#endif // __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
index 5bddb9185..e5c3f5fd5 100644
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
+++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
@@ -27,10 +27,10 @@ namespace backend
 namespace controlflow
 {
 
-TensorBuilder::TensorBuilder()
-    : _tensor_reg{new cpu_common::TensorRegistry()}, _user_tensor_reg{new UserTensorRegistry()},
-      _static_tensor_mgr{new cpu_common::StaticTensorManager(_tensor_reg)},
-      _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg, _user_tensor_reg)}
+TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
+    : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
+      _static_tensor_mgr{
+          new cpu_common::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
 {
   /* empty */
 }
@@ -54,10 +54,13 @@ void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::Op
 
 void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
 {
-  assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+  // TODO Enhance the way of checking user tensors
+  if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
+    return;
+
   const auto tensor_info = _tensor_info_map.at(ind);
 
-  if (!at(ind)->is_dynamic())
+  if (!nativeOwnTensorAt(ind)->is_dynamic())
   {
     const auto size = tensor_info.total_size();
     _static_tensor_mgr->claimPlan(ind, size);
@@ -66,7 +69,11 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
 
 void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
 {
-  if (!at(ind)->is_dynamic())
+  // TODO Enhance the way of checking user tensors
+  if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
+    return;
+
+  if (!nativeOwnTensorAt(ind)->is_dynamic())
   {
     _static_tensor_mgr->releasePlan(ind);
   }
@@ -74,6 +81,11 @@ void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
 
 bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
 {
+  // User tensors are not registered in _tensor_info_map but objects for them are exist
+  // in the tensor registry.
+  // TODO Enhance the way of checking user tensors
+  if (_tensor_reg->getITensor(ind))
+    return true;
   return _tensor_info_map.find(ind) != _tensor_info_map.end();
 }
 
@@ -89,25 +101,9 @@ void TensorBuilder::allocate()
   //      This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
 }
 
-std::shared_ptr<ITensor> TensorBuilder::tensorAt(const ir::OperandIndex &ind)
-{
-  // NOTE Find from User Tensor Registry first
-  // FIXME There may be both user tensor and native tensor for a `ind` which is a waste
-  auto user_tensor = _user_tensor_reg->getITensor(ind);
-  auto tensor = _tensor_reg->getITensor(ind);
-  if (user_tensor)
-  {
-    return user_tensor;
-  }
-  else
-    return tensor;
-}
-
-void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->iterate(fn); }
-
-std::shared_ptr<cpu_common::Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
+std::shared_ptr<cpu_common::Tensor> TensorBuilder::nativeOwnTensorAt(const ir::OperandIndex &ind)
 {
-  return _tensor_reg->getNativeTensor(ind);
+  return _tensor_reg->getNativeOwnTensor(ind);
 }
 
 std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
@@ -120,10 +116,10 @@ std::unique_ptr<ITensorManager> TensorBuilder::releaseDynamicTensorManager(void)
   return std::move(_dynamic_tensor_mgr);
 }
 
-void TensorBuilder::setUserTensor(const ir::OperandIndex &ind,
-                                  const std::shared_ptr<UserTensor> &tensor)
+void TensorBuilder::setNativeUserTensor(const ir::OperandIndex &ind,
+                                        const std::shared_ptr<UserTensor> &tensor)
 {
-  _user_tensor_reg->setNativeTensor(ind, tensor);
+  _tensor_reg->setNativeUserTensor(ind, tensor);
 }
 
 } // namespace controlflow
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
index 9f2bb3754..2f2a2c47e 100644
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
+++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
@@ -39,9 +39,7 @@ namespace controlflow
 class TensorBuilder : public ITensorBuilder
 {
 public:
-  TensorBuilder();
-
-  bool supportDynamicTensor() override { return true; }
+  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
 
   /**
    * @brief     Register tensor information to allocate on CPU backend
@@ -61,15 +59,6 @@ public:
   void allocate() override;
   void postFunctionPrepare() override { /* DO NOTHING */}
 
-  /**
-   * @brief Get tensor with a specific OperandIndex
-   *
-   * @return shared_ptr<ITensor> if a tensor with given OperandIndex exists. nullptr otherwise.
-   */
-  std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
-
-  void iterate(const IterateFunction &fn) override;
-
   std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
 
   IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
@@ -82,16 +71,13 @@ public:
    *        If not, program will crash with assert or exception.
    * @return shared_ptr<operand::Tensor>
    */
-  std::shared_ptr<cpu_common::Tensor> at(const ir::OperandIndex &ind);
-  void setUserTensor(const ir::OperandIndex &ind, const std::shared_ptr<UserTensor> &tensor);
-
-  std::shared_ptr<ITensorRegistry> tensorRegistry() override { return _tensor_reg; }
+  std::shared_ptr<cpu_common::Tensor> nativeOwnTensorAt(const ir::OperandIndex &ind);
+  void setNativeUserTensor(const ir::OperandIndex &ind, const std::shared_ptr<UserTensor> &tensor);
 
 private:
-  const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
-  const std::shared_ptr<UserTensorRegistry> _user_tensor_reg;
-  std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr;
+  const std::shared_ptr<TensorRegistry> _tensor_reg;
   std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
+  std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr;
   ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
   ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
 };
diff --git a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
new file mode 100644
index 000000000..678c5b73b
--- /dev/null
+++ b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
+
+#include "backend/cpu_common/TensorRegistry.h"
+#include "backend/ITensorRegistry.h"
+#include "Tensor.h"
+#include "UserTensor.h"
+#include <assert.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace controlflow
+{
+
+/**
+ * @brief Tensor registry class for controlflow backend
+ *
+ * This class contains three types of tensors. Two native tensors(tensors that are managed by this
+ * backend) and the other is migrant tensor.
+ *
+ * - NativeUserTensor - @c UserTensor managed by this backend, buffer is user-given
+ * - NativeOwnTensor  - @c cpu_common::Tensor managed by this backend ( in @c _base_reg )
+ * - MigrantTensor    - @c IPortableTensor managed by other backends ( in @c _base_reg )
+ *
+ * @note @c _base_reg is used in implementation to reuse @c cpu_common::StaticTensorManager
+ *
+ */
+class TensorRegistry : public ITensorRegistry
+{
+public:
+  TensorRegistry() : _base_reg{new cpu_common::TensorRegistry} {}
+
+  std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
+  {
+    auto base_tensor = _base_reg->getITensor(ind);
+    if (base_tensor)
+      return base_tensor;
+    return getNativeUserTensor(ind);
+  }
+
+  std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
+  {
+    auto base_tensor = _base_reg->getNativeITensor(ind);
+    if (base_tensor)
+      return base_tensor;
+    return getNativeUserTensor(ind);
+  }
+
+  std::shared_ptr<IPortableTensor> getPortableTensor(const ir::OperandIndex &ind)
+  {
+    auto base_tensor = _base_reg->getPortableTensor(ind);
+    if (base_tensor)
+      return base_tensor;
+    return getNativeUserTensor(ind);
+  }
+
+  std::shared_ptr<IPortableTensor> getNativeTensor(const ir::OperandIndex &ind)
+  {
+    auto base_tensor = _base_reg->getNativeTensor(ind);
+    if (base_tensor)
+      return base_tensor;
+    return getNativeUserTensor(ind);
+  }
+
+  std::shared_ptr<Tensor> getNativeOwnTensor(const ir::OperandIndex &ind)
+  {
+    return _base_reg->getNativeTensor(ind);
+  }
+
+  std::shared_ptr<UserTensor> getNativeUserTensor(const ir::OperandIndex &ind)
+  {
+    auto tensor = _native_user_tensors.find(ind);
+    if (tensor != _native_user_tensors.end())
+      return tensor->second;
+    return nullptr;
+  }
+
+  bool setMigrantTensor(const ir::OperandIndex &ind,
+                        const std::shared_ptr<IPortableTensor> &tensor) override
+  {
+    assert(tensor);
+    assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+    _base_reg->setMigrantTensor(ind, tensor);
+    return true;
+  }
+
+  void setNativeOwnTensor(ir::OperandIndex ind, const std::shared_ptr<Tensor> &tensor)
+  {
+    assert(tensor);
+    assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+    _base_reg->setNativeTensor(ind, tensor);
+  }
+
+  void setNativeUserTensor(ir::OperandIndex ind, const std::shared_ptr<UserTensor> &tensor)
+  {
+    assert(tensor);
+    assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+    _native_user_tensors[ind] = tensor;
+  }
+
+  const ir::OperandIndexMap<std::shared_ptr<UserTensor>> &native_user_tensors()
+  {
+    return _native_user_tensors;
+  }
+  std::shared_ptr<cpu_common::TensorRegistry> base_reg() { return _base_reg; }
+
+private:
+  std::shared_ptr<cpu_common::TensorRegistry> _base_reg;
+  ir::OperandIndexMap<std::shared_ptr<UserTensor>> _native_user_tensors;
+};
+
+} // namespace controlflow
+} // namespace backend
+} // namespace onert
+
+#endif // ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc
index 3c095b38c..e8f1ea679 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc
+++ b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc
@@ -55,7 +55,11 @@ void PermuteLayer::run()
       try
       {
         const auto dst_index = _dst_dyn_alloc_info_map.at(dst_tensor).ind;
-        _dst_dyn_alloc_info_map.at(dst_tensor).dyn_tensor_manager->applyShape(dst_index, new_shape);
+        auto dyn_tensor_manager = dst_tensor->dynamic_tensor_manager();
+        if (!dyn_tensor_manager)
+          throw std::runtime_error{
+              "Error: PermuteLayer: output's TensorManager does not support dynamic tensor"};
+        dyn_tensor_manager->applyShape(dst_index, new_shape);
         assert(dst_tensor->buffer() != nullptr);
       }
       catch (const std::out_of_range &e)
diff --git a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
index cb27d757f..f7ce3d011 100644
--- a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
+++ b/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
@@ -95,17 +95,7 @@ void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
 
 void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
 {
-  auto find = _dealloc_tensor_map.find(op_ind);
-  if (find != _dealloc_tensor_map.end())
-  {
-    auto &input_set = find->second;
-    input_set.emplace(operand_ind);
-  }
-  else
-  {
-    _dealloc_tensor_map.emplace(
-        std::make_pair(op_ind, std::unordered_set<ir::OperandIndex>{operand_ind}));
-  }
+  _dealloc_tensor_map[op_ind].emplace(operand_ind);
 }
 
 void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
diff --git a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
index 820cad38a..440f70c93 100644
--- a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
+++ b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
@@ -26,8 +26,10 @@ namespace backend
 namespace cpu_common
 {
 
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg)
-    : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg}
+StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
+                                         IDynamicTensorManager *dynamic_tensor_manager)
+    : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg},
+      _dynamic_tensor_manager{dynamic_tensor_manager}
 {
   // DO NOTHING
 }
@@ -78,7 +80,7 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
                                       bool as_const)
 {
   assert(!_tensors->getNativeTensor(ind));
-  auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, nullptr);
+  auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, _dynamic_tensor_manager);
   _tensors->setNativeTensor(ind, tensor);
   _as_constants[ind] = as_const;
 }
diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc
index 33b428a4b..93dbbc3b5 100644
--- a/runtime/onert/core/src/compiler/Compiler.cc
+++ b/runtime/onert/core/src/compiler/Compiler.cc
@@ -134,6 +134,12 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
         backend::controlflow::Config::ID;
   }
 
+  // FIXME This is a workaround for bcq operations, should remove it
+  {
+    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
+    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+  }
+
   {
     VERBOSE(Compiler) << std::boolalpha;
     VERBOSE(Compiler) << "==== Compiler Options ====" << std::endl;
@@ -181,14 +187,14 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
   auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options.graph_dump_level);
 
   // Lower: Assign backend
-  std::unordered_map<ir::SubgraphIndex, std::unique_ptr<ir::LoweredGraph>> lowered_subgs;
+  std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs;
   _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
     _options.is_primary_subgraph = (index == ir::SubgraphIndex{0});
     onert::dumper::dot::DotDumper dot_dumper(subg, dump_level);
     dot_dumper.dump(nnfw::misc::str("before_lower_subg-", index.value()));
 
     // Lower: Assign backend
-    lowered_subgs[index] = std::make_unique<ir::LoweredGraph>(subg, _options);
+    lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, _options);
 
     // Check backend(s) for subgraph support FP16
     bool backends_support_fp16 = true;
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc
index 82afd9e56..062c6c9c3 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.cc
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -25,6 +25,7 @@
 #include "compiler/ExecutionBuilder.h"
 #include "exec/ExecTime.h"
 #include "compiler/Linear.h"
+#include "compiler/TensorBuilders.h"
 #include "backend/IConstantInitializer.h"
 #include "backend/IKernelGenerator.h"
 #include "backend/IOptimizer.h"
@@ -64,6 +65,23 @@ private:
   std::shared_ptr<backend::IConfig> _config;
 };
 
+// TODO Think of a better way to manage TensorManagers
+backend::TensorManagerSet createTensorManagerSet(const compiler::TensorBuilders &tensor_builders)
+{
+  backend::TensorManagerSet tensor_mgrs;
+  for (auto &tensor_builder : tensor_builders)
+  {
+    auto s_tensor_manager = tensor_builder->releaseStaticTensorManager();
+    if (s_tensor_manager != nullptr)
+      tensor_mgrs.insert(std::move(s_tensor_manager));
+
+    auto d_tensor_manager = tensor_builder->releaseDynamicTensorManager();
+    if (d_tensor_manager != nullptr)
+      tensor_mgrs.insert(std::move(d_tensor_manager));
+  }
+  return tensor_mgrs;
+}
+
 } // namespace
 } // namespace onert
 
@@ -87,14 +105,14 @@ ExecutorFactory::ExecutorFactory()
                                std::placeholders::_3, true);
 }
 
-exec::IExecutor *ExecutorFactory::create(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                                          const compiler::CompilerOptions &options,
                                          const std::shared_ptr<exec::ExecutorMap> &executor_map)
 {
   return _map.at(options.executor)(std::move(lowered_graph), options, executor_map);
 }
 
-void ExecutorFactory::initializeBackendContext(ir::LoweredGraph *lowered_graph)
+void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_graph)
 {
   struct Entry
   {
@@ -132,7 +150,7 @@ void ExecutorFactory::initializeBackendContext(ir::LoweredGraph *lowered_graph)
   }
 }
 
-void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph,
+void ExecutorFactory::runTensorRegistration(compiler::LoweredGraph *lowered_graph,
                                             const std::vector<ir::OpSequenceIndex> &order)
 {
   for (const auto index : order)
@@ -141,6 +159,8 @@ void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph,
     const auto backend = lowered_graph->getLowerInfo(index)->backend();
     const auto tensor_register = lowered_graph->backend_contexts().at(backend)->tensor_register;
     auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
+    auto model_io = lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs();
+
     if (tensor_register)
     {
       // Custom registration
@@ -154,7 +174,7 @@ void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph,
         const auto &op = lowered_graph->graph().operations().at(op_idx);
         for (const auto &index : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
         {
-          if (!tensor_builder->isRegistered(index))
+          if (!tensor_builder->isRegistered(index) && !model_io.contains(index))
           {
             const auto &operand_lower_info =
                 lowered_graph->getLowerInfo(index)->def_factors().getOnlyElement();
@@ -181,15 +201,28 @@ void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph,
 }
 
 std::vector<std::shared_ptr<backend::ITensor>>
-ExecutorFactory::initializeModelIOTensors(ir::LoweredGraph &lowered_graph,
+ExecutorFactory::initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
                                           const ir::OperandIndexSequence &indices)
 {
   std::vector<std::shared_ptr<backend::ITensor>> ret;
 
-  TensorBuilders tensor_builders{lowered_graph.backend_contexts(), false};
-  std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder =
-      tensor_builders.getControlflowTensorBuilder();
+  // TODO Store controlflow backend in BackendContext
+  std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder;
+  std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
+  for (const auto &e : lowered_graph.backend_contexts())
+  {
+    auto backend = e.first;
+    auto &context = e.second;
+    if (backend->config()->id() == backend::controlflow::Config::ID)
+    {
+      cf_tensor_builder =
+          std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(context->tensor_builder);
+      cf_tensor_reg =
+          std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
+    }
+  }
   assert(cf_tensor_builder);
+  assert(cf_tensor_reg);
 
   for (auto ind : indices)
   {
@@ -200,15 +233,16 @@ ExecutorFactory::initializeModelIOTensors(ir::LoweredGraph &lowered_graph,
         cf_tensor_builder->dynamicTensorManager());
 
     // Add tensor to controlflow TensorRegistry.
-    cf_tensor_builder->setUserTensor(ind, tensor);
+    cf_tensor_reg->setNativeUserTensor(ind, tensor);
     ret.push_back(tensor);
   }
   return ret;
 }
 
-void ExecutorFactory::prepareExternalTensors(ir::LoweredGraph &lowered_graph,
-                                             TensorBuilders &tensor_builders)
+void ExecutorFactory::prepareExternalTensors(compiler::LoweredGraph &lowered_graph)
 {
+  TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true};
+
   lowered_graph.op_seqs().iterate(
       [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
         auto lower_info = lowered_graph.getLowerInfo(op_seq_index);
@@ -219,20 +253,20 @@ void ExecutorFactory::prepareExternalTensors(ir::LoweredGraph &lowered_graph,
           // If an OpSequence input/output tensor does not have a own tensor object,
           // it must be using external tensors, so find the tensor from other tensor builders and
           // set the tensor to this tensor builder if portable
-          if (!backend_ctx->tensor_builder->tensorAt(ind))
+          if (!backend_ctx->tensor_registry->getITensor(ind))
           {
-            auto tensor = tensor_builders.getITensor(ind);
-            assert(tensor); // The tensor must have been created in one of TensorBuilders
+            auto tensor = tensor_regs.getITensor(ind);
+            assert(tensor); // The tensor must have been registered
             auto ptensor = std::dynamic_pointer_cast<backend::IPortableTensor>(tensor);
             if (ptensor)
-              backend_ctx->tensor_builder->setMigrantTensor(ind, ptensor);
+              backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
           }
         }
       });
 }
 
 exec::IExecutor *
-ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                                       const compiler::CompilerOptions &options,
                                       const std::shared_ptr<exec::ExecutorMap> &executor_map)
 {
@@ -277,13 +311,14 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_
   Linear::planTensors(*lowered_graph, order);
 
   TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
+  TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
 
   for (auto &tensor_builder : tensor_builders)
   {
     tensor_builder->prepare();
   }
 
-  prepareExternalTensors(*lowered_graph, tensor_builders);
+  prepareExternalTensors(*lowered_graph);
 
   ExecutionBuilder builder;
 
@@ -296,7 +331,7 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_
     auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get());
     if (cf_kernel_gen != nullptr)
     {
-      cf_kernel_gen->setTensorBuilderSet(tensor_builders);
+      cf_kernel_gen->setTensorRegistries(tensor_regs);
       cf_kernel_gen->setExecutorMap(executor_map);
     }
     auto fn_seq = kernel_gen->generate(op_seq);
@@ -335,9 +370,10 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_
     });
   }
 
-  auto exec =
-      new exec::LinearExecutor{std::move(lowered_graph), input_tensors,       output_tensors,
-                               tensor_builders,          std::move(code_map), order};
+  backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders);
+  auto exec = new exec::LinearExecutor{
+      std::move(lowered_graph), input_tensors,       output_tensors, tensor_regs,
+      std::move(tensor_mgrs),   std::move(code_map), order};
 
   if (!options.trace_filepath.empty())
   {
@@ -350,7 +386,7 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_
 }
 
 exec::IExecutor *ExecutorFactory::createDataflowExecutor(
-    std::unique_ptr<ir::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
+    std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
     const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel)
 {
   const auto &backend_contexts = lowered_graph->backend_contexts();
@@ -369,6 +405,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
   }
 
   TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
+  TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
 
   // To make tensors never be deallocated, this is a workaround to use static memory planner
   for (auto &tensor_builder : tensor_builders)
@@ -387,7 +424,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
     tensor_builder->prepare();
   }
 
-  prepareExternalTensors(*lowered_graph, tensor_builders);
+  prepareExternalTensors(*lowered_graph);
 
   ExecutionBuilder builder;
 
@@ -401,7 +438,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
     if (cf_kernel_gen != nullptr)
     {
       assert(cf_kernel_gen != nullptr);
-      cf_kernel_gen->setTensorBuilderSet(tensor_builders);
+      cf_kernel_gen->setTensorRegistries(tensor_regs);
       cf_kernel_gen->setExecutorMap(executor_map);
     }
     auto fn_seq = kernel_gen->generate(op_seq);
@@ -440,17 +477,20 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
     });
   }
 
+  backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders);
+
   exec::ExecutorBase *exec = nullptr;
   if (parallel)
   {
-    exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors, output_tensors,
-                                      tensor_builders, std::move(code_map)};
+    exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors,
+                                      output_tensors,           tensor_regs,
+                                      std::move(tensor_mgrs),   std::move(code_map)};
   }
   else
   {
-    auto dataflow_exec =
-        new exec::DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors,
-                                   tensor_builders, std::move(code_map)};
+    auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), input_tensors,
+                                                    output_tensors,           tensor_regs,
+                                                    std::move(tensor_mgrs),   std::move(code_map)};
     if (options.he_profiling_mode)
     {
       std::vector<const backend::Backend *> backends;
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h
index 418e5a764..b8893c03b 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.h
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.h
@@ -21,8 +21,8 @@
 
 #include "backend/ITensor.h"
 #include "exec/IExecutor.h"
-#include "ir/LoweredGraph.h"
-#include "TensorBuilders.h"
+#include "compiler/LoweredGraph.h"
+#include "TensorRegistries.h"
 
 namespace onert
 {
@@ -35,7 +35,7 @@ public:
   static ExecutorFactory &get();
 
 public:
-  exec::IExecutor *create(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+  exec::IExecutor *create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                           const compiler::CompilerOptions &options,
                           const std::shared_ptr<exec::ExecutorMap> &executor_map);
 
@@ -43,28 +43,27 @@ private:
   ExecutorFactory();
 
 private:
-  static void initializeBackendContext(ir::LoweredGraph *lowered_graph);
-  static void runTensorRegistration(ir::LoweredGraph *lowered_graph,
+  static void initializeBackendContext(compiler::LoweredGraph *lowered_graph);
+  static void runTensorRegistration(compiler::LoweredGraph *lowered_graph,
                                     const std::vector<ir::OpSequenceIndex> &order);
   static std::vector<std::shared_ptr<backend::ITensor>>
-  initializeModelIOTensors(ir::LoweredGraph &lowered_graph,
+  initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
                            const ir::OperandIndexSequence &indices);
-  static void prepareExternalTensors(ir::LoweredGraph &lowered_graph,
-                                     TensorBuilders &tensor_builders);
+  static void prepareExternalTensors(compiler::LoweredGraph &lowered_graph);
   static exec::IExecutor *
-  createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+  createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                        const compiler::CompilerOptions &options,
                        const std::shared_ptr<exec::ExecutorMap> &executor_map);
   static exec::IExecutor *
-  createDataflowExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+  createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                          const compiler::CompilerOptions &options,
                          const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel);
 
 private:
-  std::unordered_map<
-      std::string, std::function<exec::IExecutor *(
-                       std::unique_ptr<ir::LoweredGraph>, const compiler::CompilerOptions &options,
-                       const std::shared_ptr<exec::ExecutorMap> &executor_map)>>
+  std::unordered_map<std::string, std::function<exec::IExecutor *(
+                                      std::unique_ptr<compiler::LoweredGraph>,
+                                      const compiler::CompilerOptions &options,
+                                      const std::shared_ptr<exec::ExecutorMap> &executor_map)>>
       _map;
 };
 
diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
index 5c4b84ec0..23a6a253d 100644
--- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
+++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
@@ -44,7 +44,7 @@ namespace onert
 namespace compiler
 {
 
-Fp32ToFp16Converter::Fp32ToFp16Converter(ir::LoweredGraph &lowered_graph)
+Fp32ToFp16Converter::Fp32ToFp16Converter(compiler::LoweredGraph &lowered_graph)
     : _lowered_graph{lowered_graph}
 {
   VERBOSE(Fp32ToFp16Converter) << "Fp16 Enable on" << std::endl;
diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h
index 5dbf74472..eeecb9846 100644
--- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h
+++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h
@@ -17,7 +17,7 @@
 #ifndef __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
 #define __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
 
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
 
 namespace onert
 {
@@ -28,7 +28,7 @@ namespace compiler
 class Fp32ToFp16Converter
 {
 public:
-  Fp32ToFp16Converter(ir::LoweredGraph &lowered_graph);
+  Fp32ToFp16Converter(compiler::LoweredGraph &lowered_graph);
 
 public:
   void run();
@@ -89,7 +89,7 @@ private:
   void convertOperandsOfOpSequence(ir::OpSequence &op_seq);
 
 private:
-  ir::LoweredGraph &_lowered_graph;
+  compiler::LoweredGraph &_lowered_graph;
   OpSeqIndexList _list_fp32_to_fp16;
   OpSeqIndexList _list_fp16_to_fp32;
 };
diff --git a/runtime/onert/core/src/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.cc
index de9b4fbd0..5653b090e 100644
--- a/runtime/onert/core/src/compiler/HEScheduler.cc
+++ b/runtime/onert/core/src/compiler/HEScheduler.cc
@@ -54,42 +54,10 @@ static bool isQuant(const ir::Graph &graph, const ir::Operation &node)
   return false;
 }
 
-static bool isWorkaroundSkip(const ir::Graph &graph, const backend::Backend *backend,
-                             const ir::Operation &node, bool quant)
+static bool isWorkaroundSkip(const ir::Graph &, const backend::Backend *, const ir::Operation &,
+                             bool)
 {
-  /* TODO: this is workaround, come up with better solution if have.
-      Adding exception in stage doesn't help. Because if there is a record for add without
-      broadcast, scheduling will select it since it doesn't distinguish broadcast and
-      non-broadcast like it does for quant non-quantized*/
-  if (backend->config()->id() == "cpu" &&
-      (node.opcode() == ir::OpCode::Add || node.opcode() == ir::OpCode::Sub ||
-       node.opcode() == ir::OpCode::Mul))
-  {
-    const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
-    const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-    /*Broadcasting isn't supported on CPU: no way to differ the existing exec_time record with and
-     * without broadcasting*/
-    if (!(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
-    {
-      return true;
-    }
-  }
-  /* TODO: this is workaround, come up with better solution if have.
-          Adding exception in stage doesn't help. Because if there is a record for Mul without
-          broadcast, scheduling will select it since it doesn't distinguish broadcast and
-          non-broadcast like it does for quant non-quantized*/
-  else if (backend->config()->id() == "acl_neon" && node.opcode() == ir::OpCode::Mul)
-  {
-    const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
-    const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
-    // Nontrivial broadcasting isn't supported yet
-    if (quant ||
-        !(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
-    {
-      return true;
-    }
-  }
+  // Now, there is no workaround
   return false;
 }
 
diff --git a/runtime/onert/core/src/compiler/HEScheduler.h b/runtime/onert/core/src/compiler/HEScheduler.h
index d8ceca9c8..b9cee5881 100644
--- a/runtime/onert/core/src/compiler/HEScheduler.h
+++ b/runtime/onert/core/src/compiler/HEScheduler.h
@@ -59,6 +59,8 @@ public:
   {
     for (auto &entry : backend_contexts)
     {
+      if (entry.first->config()->id() == backend::controlflow::Config::ID)
+        continue;
       _all_backends.push_back(entry.first);
     }
     _backend_resolver = std::make_unique<compiler::BackendResolver>();
diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc
index 493ca1e43..49a989500 100644
--- a/runtime/onert/core/src/compiler/Linear.cc
+++ b/runtime/onert/core/src/compiler/Linear.cc
@@ -29,7 +29,7 @@ namespace onert
 namespace compiler
 {
 
-std::vector<ir::OpSequenceIndex> Linear::linearize(const ir::LoweredGraph &lowered_graph)
+std::vector<ir::OpSequenceIndex> Linear::linearize(const compiler::LoweredGraph &lowered_graph)
 {
   std::vector<ir::OpSequenceIndex> order;
   lowered_graph.iterateTopolOpSeqs(
@@ -39,7 +39,7 @@ std::vector<ir::OpSequenceIndex> Linear::linearize(const ir::LoweredGraph &lower
   return order;
 }
 
-void Linear::dump(const ir::LoweredGraph &lowered_graph,
+void Linear::dump(const compiler::LoweredGraph &lowered_graph,
                   const std::vector<ir::OpSequenceIndex> &order)
 {
   {
@@ -62,7 +62,7 @@ void Linear::dump(const ir::LoweredGraph &lowered_graph,
   }
 }
 
-void Linear::planTensors(const ir::LoweredGraph &lowered_graph,
+void Linear::planTensors(const compiler::LoweredGraph &lowered_graph,
                          const std::vector<ir::OpSequenceIndex> &order)
 {
   const auto &graph = lowered_graph.graph();
@@ -180,11 +180,9 @@ void Linear::planTensors(const ir::LoweredGraph &lowered_graph,
           tensor_builder_map[ind]->notifyLastUse(ind);
 
           // plan for deallocation of dynamic tensor
-          if (tensor_builder_map[ind]->supportDynamicTensor())
-          {
-            assert(tensor_builder_map[ind]->dynamicTensorManager());
-            tensor_builder_map[ind]->dynamicTensorManager()->planDealloc(op_idx, ind);
-          }
+          auto dyn_tensor_manager = tensor_builder_map[ind]->dynamicTensorManager();
+          if (dyn_tensor_manager)
+            dyn_tensor_manager->planDealloc(op_idx, ind);
         }
       }
     }
diff --git a/runtime/onert/core/src/compiler/Linear.h b/runtime/onert/core/src/compiler/Linear.h
index faeff77f3..1e24cf92b 100644
--- a/runtime/onert/core/src/compiler/Linear.h
+++ b/runtime/onert/core/src/compiler/Linear.h
@@ -23,7 +23,7 @@
 #include "ir/OpSequences.h"
 #include "ir/Index.h"
 #include "backend/ITensorBuilder.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
 
 namespace onert
 {
@@ -41,10 +41,10 @@ namespace compiler
 class Linear
 {
 public:
-  static std::vector<ir::OpSequenceIndex> linearize(const ir::LoweredGraph &lowered_graph);
-  static void dump(const ir::LoweredGraph &lowered_graph,
+  static std::vector<ir::OpSequenceIndex> linearize(const compiler::LoweredGraph &lowered_graph);
+  static void dump(const compiler::LoweredGraph &lowered_graph,
                    const std::vector<ir::OpSequenceIndex> &order);
-  static void planTensors(const ir::LoweredGraph &lowered_graph,
+  static void planTensors(const compiler::LoweredGraph &lowered_graph,
                           const std::vector<ir::OpSequenceIndex> &order);
 };
 
diff --git a/runtime/onert/core/src/ir/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc
index 8aedfbdf0..1489a1884 100644
--- a/runtime/onert/core/src/ir/LoweredGraph.cc
+++ b/runtime/onert/core/src/compiler/LoweredGraph.cc
@@ -14,18 +14,18 @@
  * limitations under the License.
  */
 
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
 
 #include <assert.h>
 #include <sstream>
 #include "util/logging.h"
-#include "pass/ConstantInsertionPass.h"
-#include "pass/ConstantLoweringPass.h"
-#include "pass/PermutationOperationPass.h"
-#include "pass/PermutationInsertionPass.h"
-#include "pass/PermutationEliminationPass.h"
+#include "compiler/pass/ConstantInsertionPass.h"
+#include "compiler/pass/ConstantLoweringPass.h"
+#include "compiler/pass/PermutationOperationPass.h"
+#include "compiler/pass/PermutationInsertionPass.h"
+#include "compiler/pass/PermutationEliminationPass.h"
 #include "ir/GraphIterator.h"
-#include "verifier/Verifier.h"
+#include "ir/verifier/Verifier.h"
 #include "backend/Backend.h"
 #include "backend/IConfig.h"
 #include "compiler/BackendResolver.h"
@@ -34,16 +34,15 @@
 
 namespace onert
 {
-namespace ir
+namespace compiler
 {
 
-LoweredGraph::LoweredGraph(const Graph &graph, const compiler::CompilerOptions &options)
-    : _graph{graph}
+LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph}
 {
   bool linear_executor = (options.executor == "Linear");
 
   // Build backend contexts
-  auto &backend_manager = compiler::BackendManager::get();
+  auto &backend_manager = BackendManager::get();
 
   // Always create Controlflow backend context
   auto cf_backend = backend_manager.getControlflow();
@@ -73,36 +72,37 @@ LoweredGraph::LoweredGraph(const Graph &graph, const compiler::CompilerOptions &
 
   // TODO Move "schedule" phase out of here
   // Schedule
-  std::unique_ptr<compiler::BackendResolver> backend_resolver;
+  std::unique_ptr<BackendResolver> backend_resolver;
   if (options.he_scheduler)
   {
-    auto scheduler = compiler::HEScheduler(_backend_contexts, options);
+    auto scheduler = HEScheduler(_backend_contexts, options);
     backend_resolver = scheduler.schedule(_graph);
     _indexed_ranks = scheduler.getIndexedRanks();
   }
   else
   {
-    auto scheduler = compiler::ManualScheduler(_backend_contexts, options);
+    auto scheduler = ManualScheduler(_backend_contexts, options);
     backend_resolver = scheduler.schedule(_graph);
   }
 
   {
     // operand::LowerInfo holder
-    OperandIndexMap<std::unique_ptr<operand::LowerInfo>> operands_lower_info;
+    ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> operands_lower_info;
 
-    _graph.operands().iterate([&](const OperandIndex &index, const Operand &) {
-      operands_lower_info[index] = std::make_unique<operand::LowerInfo>();
+    _graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
+      operands_lower_info[index] = std::make_unique<ir::operand::LowerInfo>();
     });
 
     // Make op_seqs while checking whether a node can be merged into a op_seq.
     makeOpSequences(operands_lower_info, options, *backend_resolver);
 
-    _op_seqs.iterate([&](const OpSequenceIndex &, OpSequence &op_seq) {
+    _op_seqs.iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
       assert(op_seq.operations().size() > 0);
       std::reverse(std::begin(op_seq.operations()), std::end(op_seq.operations()));
     });
 
-    _op_seqs.dump("merged and sorted operations without permutation", _graph.operations());
+    VERBOSE(OpSequences) << "dump without permutation" << std::endl;
+    dumpOpSequences(_op_seqs, _graph.operations());
 
     pass::ConstantInsertionPass ci_pass(*this);
     ci_pass.run();
@@ -127,17 +127,19 @@ LoweredGraph::LoweredGraph(const Graph &graph, const compiler::CompilerOptions &
     pass::PermutationEliminationPass pe_pass(*this);
     pe_pass.run();
 
-    _op_seqs.dump("merged and sorted operations with permutation", _graph.operations());
+    VERBOSE(OpSequences) << "dump with permutation" << std::endl;
+    dumpOpSequences(_op_seqs, _graph.operations());
   }
 
   // Graph verifications
   {
-    assert(verifier::DAGChecker().verify(_graph));
-    assert(verifier::EdgeConsistencyChecker().verify(_graph));
+    assert(ir::verifier::DAGChecker().verify(_graph));
+    assert(ir::verifier::EdgeConsistencyChecker().verify(_graph));
   }
 }
 
-const operation::LowerInfo *LoweredGraph::getLowerInfo(const OpSequenceIndex &op_seq_index) const
+const ir::operation::LowerInfo *
+LoweredGraph::getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const
 {
   auto itr = _lower_info_map.op_seq.find(op_seq_index);
   if (itr == _lower_info_map.op_seq.end())
@@ -145,13 +147,13 @@ const operation::LowerInfo *LoweredGraph::getLowerInfo(const OpSequenceIndex &op
   return itr->second.get();
 }
 
-void LoweredGraph::setLowerInfo(const OpSequenceIndex &op_seq_index,
-                                std::unique_ptr<operation::LowerInfo> &&lower_info)
+void LoweredGraph::setLowerInfo(const ir::OpSequenceIndex &op_seq_index,
+                                std::unique_ptr<ir::operation::LowerInfo> &&lower_info)
 {
   _lower_info_map.op_seq.insert(std::make_pair(op_seq_index, std::move(lower_info)));
 }
 
-void LoweredGraph::removeLowerInfo(const OpSequenceIndex &op_seq_index)
+void LoweredGraph::removeLowerInfo(const ir::OpSequenceIndex &op_seq_index)
 {
   auto &op_seq_lower_info = _lower_info_map.op_seq;
   assert(op_seq_lower_info.find(op_seq_index) != op_seq_lower_info.end());
@@ -165,7 +167,7 @@ void LoweredGraph::removeLowerInfo(const OpSequenceIndex &op_seq_index)
   }
 }
 
-const operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index) const
+const ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index) const
 {
   auto itr = _lower_info_map.operand.find(index);
   if (itr == _lower_info_map.operand.end())
@@ -173,7 +175,7 @@ const operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index)
   return itr->second.get();
 }
 
-operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index)
+ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index)
 {
   auto itr = _lower_info_map.operand.find(index);
   if (itr == _lower_info_map.operand.end())
@@ -181,25 +183,26 @@ operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index)
   return itr->second.get();
 }
 
-void LoweredGraph::setLowerInfo(const OperandIndex &index,
-                                std::unique_ptr<operand::LowerInfo> &&lower_info)
+void LoweredGraph::setLowerInfo(const ir::OperandIndex &index,
+                                std::unique_ptr<ir::operand::LowerInfo> &&lower_info)
 {
   _lower_info_map.operand.insert(std::make_pair(index, std::move(lower_info)));
 }
 
-void LoweredGraph::removeLowerInfo(const OperandIndex &index)
+void LoweredGraph::removeLowerInfo(const ir::OperandIndex &index)
 {
   _lower_info_map.operand.erase(index);
 }
 
 void LoweredGraph::iterateTopolOpSeqs(
-    const std::function<void(const OpSequenceIndex &, const OpSequence &)> &fn) const
+    const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const
 {
-  // Topological Sorting for OpSequences
-  std::vector<OpSequenceIndex> topol_sorted;
-  PostDfsIterator<true>{}.iterateOpSeqs(
-      *this,
-      [&](const OpSequenceIndex &index, const OpSequence &) { topol_sorted.emplace_back(index); });
+  // Topological Sorting for ir::OpSequences
+  std::vector<ir::OpSequenceIndex> topol_sorted;
+  ir::PostDfsIterator<true>{}.iterateOpSeqs(
+      *this, [&](const ir::OpSequenceIndex &index, const ir::OpSequence &) {
+        topol_sorted.emplace_back(index);
+      });
   std::reverse(topol_sorted.begin(), topol_sorted.end());
   for (const auto op_seq_idx : topol_sorted)
   {
@@ -209,12 +212,14 @@ void LoweredGraph::iterateTopolOpSeqs(
 }
 
 void LoweredGraph::iterateTopolOpSeqs(
-    const std::function<void(const OpSequenceIndex &, OpSequence &)> &fn)
+    const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn)
 {
-  // Topological Sorting for OpSequences
-  std::vector<OpSequenceIndex> topol_sorted;
-  PostDfsIterator<false>{}.iterateOpSeqs(
-      *this, [&](const OpSequenceIndex &index, OpSequence &) { topol_sorted.emplace_back(index); });
+  // Topological Sorting for ir::OpSequences
+  std::vector<ir::OpSequenceIndex> topol_sorted;
+  ir::PostDfsIterator<false>{}.iterateOpSeqs(
+      *this, [&](const ir::OpSequenceIndex &index, ir::OpSequence &) {
+        topol_sorted.emplace_back(index);
+      });
   std::reverse(topol_sorted.begin(), topol_sorted.end());
   for (const auto op_seq_idx : topol_sorted)
   {
@@ -223,12 +228,12 @@ void LoweredGraph::iterateTopolOpSeqs(
   }
 }
 
-OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const OperationIndex &node_index,
-                                                          const Operation &node)
+ir::OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const ir::OperationIndex &node_index,
+                                                              const ir::Operation &node)
 {
   // Create a fresh op_seq with one operation, and append it to op_seqs
   // Create a fresh op_seq
-  auto op_seq = std::make_unique<OpSequence>(_graph.layout());
+  auto op_seq = std::make_unique<ir::OpSequence>(_graph.layout());
 
   // Add an operation
   op_seq->appendOperation(node_index);
@@ -241,21 +246,21 @@ OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const OperationIndex &
 }
 
 void LoweredGraph::makeOpSequences(
-    OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info,
-    const compiler::CompilerOptions &options, const compiler::BackendResolver &backend_resolver)
+    ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+    const CompilerOptions &options, const BackendResolver &backend_resolver)
 {
   // if SUBG_MAX_NODE == 0, no limit on nodes of a op_seq
   const int op_seq_max_node = options.op_seq_max_node;
   assert(op_seq_max_node >= 0);
 
   bool is_profiling = options.he_profiling_mode;
-  OpSequence *op_seq = nullptr;
-  OpSequenceIndex op_seq_index;
+  ir::OpSequence *op_seq = nullptr;
+  ir::OpSequenceIndex op_seq_index;
 
   // NOTE: The below method appends nodes while making one op_seq if needed. If something better
   // ways, happy to update this code.
-  PostDfsConstIterator{}.iterate(
-      _graph, [&](const OperationIndex &node_index, const Operation &node) {
+  ir::PostDfsConstIterator{}.iterate(
+      _graph, [&](const ir::OperationIndex &node_index, const ir::Operation &node) {
         // LowerInfo for in/output operands
         auto backend = backend_resolver.getBackend(node_index);
 
@@ -269,12 +274,12 @@ void LoweredGraph::makeOpSequences(
         for (auto operand : node.getInputs() | ir::Remove::UNDEFINED)
         {
           auto &&lower_info = operands_lower_info.at(operand);
-          lower_info->addUsePermuteFactor(operand::PermuteFactor{backend, backend_layout});
+          lower_info->addUsePermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
         }
         for (auto operand : node.getOutputs())
         {
           auto &&lower_info = operands_lower_info.at(operand);
-          lower_info->addDefPermuteFactor(operand::PermuteFactor{backend, backend_layout});
+          lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
         }
 
         bool new_op_seq = (op_seq == nullptr ||
@@ -288,9 +293,9 @@ void LoweredGraph::makeOpSequences(
         {
           auto new_op_seq_index = appendFreshSingleOpSequence(node_index, node);
 
-          // OpSequence LowerInfo
+          // ir::OpSequence LowerInfo
           setLowerInfo(new_op_seq_index,
-                       std::make_unique<operation::LowerInfo>(backend, backend_layout));
+                       std::make_unique<ir::operation::LowerInfo>(backend, backend_layout));
 
           op_seq_index = new_op_seq_index;
           op_seq = &(_op_seqs.at(new_op_seq_index));
@@ -318,16 +323,17 @@ void LoweredGraph::makeOpSequences(
 }
 
 void LoweredGraph::manipulateLowerInfo(
-    OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info, bool is_primary)
+    ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+    bool is_primary)
 {
-  const auto controlflow_backend = compiler::BackendManager::get().getControlflow();
+  const auto controlflow_backend = BackendManager::get().getControlflow();
 
   // TODO Rather than handling primary graph specially,
   //      let the permute inserted and remove it later
   if (is_primary)
   {
     // TODO Rather than using NHWC Get frontend layout of this node from IR
-    auto factor = operand::PermuteFactor{controlflow_backend, Layout::NHWC};
+    auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC};
     for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
     {
       auto &&lower_info = operands_lower_info.at(index);
@@ -355,9 +361,9 @@ void LoweredGraph::manipulateLowerInfo(
       else
       {
         // In case of that an operand is Graph's input and not input or output of any operation
-        lower_info->addDefPermuteFactor(operand::PermuteFactor{
+        lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{
             controlflow_backend,
-            Layout::NHWC // TODO Get frontend layout of this node from IR
+            ir::Layout::NHWC // TODO Get frontend layout of this node from IR
         });
       }
     }
@@ -368,15 +374,15 @@ void LoweredGraph::manipulateLowerInfo(
     if (lower_info->def_factors().size() == 0)
     {
       // In case of that an operand is Graph's output and not input or output of any operation
-      lower_info->addDefPermuteFactor(operand::PermuteFactor{
+      lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{
           controlflow_backend,
-          Layout::NHWC // TODO Get frontend layout of this node from IR
+          ir::Layout::NHWC // TODO Get frontend layout of this node from IR
       });
     }
   }
 
   // Set LowerInfo for each operand from the operand::LowerInfo holder
-  _graph.operands().iterate([&](const OperandIndex &index, Operand &) {
+  _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &) {
     setLowerInfo(index, std::move(operands_lower_info[index]));
   });
 }
@@ -388,11 +394,11 @@ void LoweredGraph::dumpLowerInfo()
 
   std::map<uint32_t, std::string> dumps;
 
-  _graph.operands().iterate([&](const OperandIndex &index, Operand &object) {
+  _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &object) {
     std::stringstream sstream;
     if (!getLowerInfo(index)->def_factors().empty() || !getLowerInfo(index)->use_factors().empty())
     {
-      auto factors_to_string = [](const operand::PermuteFactorSet &factors) {
+      auto factors_to_string = [](const ir::operand::PermuteFactorSet &factors) {
         std::string str;
         for (auto factor : factors)
         {
@@ -403,7 +409,7 @@ void LoweredGraph::dumpLowerInfo()
         return "{ " + str + "}";
       };
 
-      auto operation_index_to_string = [](const OperationIndexSet &operations) {
+      auto operation_index_to_string = [](const ir::OperationIndexSet &operations) {
         std::string str;
         for (auto op : operations)
         {
@@ -427,8 +433,8 @@ void LoweredGraph::dumpLowerInfo()
         sstream << (shape.dim(i)) << " ";
       }
       sstream << "}" << std::endl;
-      sstream << "  - Def Operations  : " << def_ops << std::endl;
-      sstream << "  - Use Operations  : " << use_ops << std::endl;
+      sstream << "  - Def ir::Operations  : " << def_ops << std::endl;
+      sstream << "  - Use ir::Operations  : " << use_ops << std::endl;
       sstream << "  - Lower Info" << std::endl;
       sstream << "    - Def Backends    : " << def_layouts << std::endl;
       sstream << "    - Use Backends    : " << use_layouts << std::endl;
@@ -445,8 +451,9 @@ void LoweredGraph::dumpLowerInfo()
   }
 }
 
-bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const OperationIndex &node_index,
-                             Layout layout, const compiler::BackendResolver &backend_resolver)
+bool LoweredGraph::mergeable(const ir::OpSequenceIndex &op_seq_index,
+                             const ir::OperationIndex &node_index, ir::Layout layout,
+                             const BackendResolver &backend_resolver)
 {
   // Are they mergeable?
   // 1. the same backend id and layout?
@@ -470,10 +477,10 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio
 
   // Branched?
   {
-    std::unordered_set<OperationIndex> branched_set;
+    std::unordered_set<ir::OperationIndex> branched_set;
 
     // Check for branching up
-    for (const auto &input : op_seq.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+    for (const auto &input : op_seq.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
     {
       const auto &input_obj = _graph.operands().at(input);
       auto def = input_obj.getDef();
@@ -489,7 +496,7 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio
     branched_set.clear();
 
     // Check for branching down
-    for (const auto &output : node.getOutputs() | Remove::DUPLICATED)
+    for (const auto &output : node.getOutputs() | ir::Remove::DUPLICATED)
     {
       // TODO Fix this workaround for the case of model outputs that are used by another operation
       //      This is needed since the branching is decided by operation, but for model outputs,
@@ -516,7 +523,7 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio
     const auto &node_outputs = node.getOutputs();
 
     // op_seq's operations are in order so that we just check the first and the last
-    std::vector<OperationIndex> op_seq_ops{op_seq.operations()[0]};
+    std::vector<ir::OperationIndex> op_seq_ops{op_seq.operations()[0]};
     if (op_seq.operations().size() > 1)
       op_seq_ops.emplace_back(op_seq.operations()[op_seq.operations().size() - 1]);
 
@@ -556,5 +563,5 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio
   return false;
 }
 
-} // namespace ir
+} // namespace compiler
 } // namespace onert
diff --git a/runtime/onert/core/src/compiler/ManualScheduler.cc b/runtime/onert/core/src/compiler/ManualScheduler.cc
index 1d591ae3c..ed49ee56f 100644
--- a/runtime/onert/core/src/compiler/ManualScheduler.cc
+++ b/runtime/onert/core/src/compiler/ManualScheduler.cc
@@ -40,7 +40,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
   const auto &manual_options = _options.manual_scheduler_options;
   auto backend_resolver = std::make_unique<compiler::BackendResolver>();
 
-  // This fallback will be used for unavailable backends
+  // This fallback will be used in case that `backend_for_all` is unavailable
   auto fallback = [&]() -> const backend::Backend * {
     for (auto backend_id : _options.backend_list)
     {
@@ -50,7 +50,8 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
     }
     return nullptr;
   }();
-  assert(fallback != nullptr); // There must be at least one fallback
+  if (fallback == nullptr)
+    throw std::runtime_error{"No loaded backends available."};
 
   // 1. Backend for All operations
   const backend::Backend *backend_all = resolveBackend(manual_options.backend_for_all, fallback);
@@ -110,7 +111,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
 const backend::Backend *ManualScheduler::resolveBackend(const std::string &id,
                                                         const backend::Backend *fallback)
 {
-  // Ensure if the backend is available in the backend
+  // Ensure if the backend is available in the current backend context
   const backend::Backend *backend = BackendManager::get().get(id);
   if (!backend || _backend_contexts.find(backend) == _backend_contexts.end())
   {
diff --git a/runtime/onert/core/src/compiler/OperationValidator.cc b/runtime/onert/core/src/compiler/OperationValidator.cc
index 44496318f..f7f659e3e 100644
--- a/runtime/onert/core/src/compiler/OperationValidator.cc
+++ b/runtime/onert/core/src/compiler/OperationValidator.cc
@@ -68,19 +68,6 @@ void OperationValidator::operator()()
       [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
 }
 
-void OperationValidator::visit(const ir::operation::Abs &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::AvgPool2D &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
-    return;
-
-  const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
-  OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
-}
-
 void OperationValidator::visit(const ir::operation::BatchMatMul &node)
 {
   const auto lhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::LHS));
@@ -125,17 +112,6 @@ void OperationValidator::visit(const ir::operation::BatchToSpaceND &node)
   OP_REQUIRES(input_shape.C == output_shape.C);
 }
 
-void OperationValidator::visit(const ir::operation::Cast &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  const auto input_index{node.getInputs().at(0)};
-
-  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
-
 void OperationValidator::visit(const ir::operation::Comparison &node)
 {
   const auto output_index{node.getOutputs().at(0)};
@@ -177,6 +153,17 @@ void OperationValidator::visit(const ir::operation::InstanceNorm &node)
   OP_REQUIRES(_ctx.at(beta_index).shape().rank() == 1);
 }
 
+void OperationValidator::visit(const ir::operation::Pool2D &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  if (_ctx.at(ofm_index).info().isDynamic())
+    return;
+
+  const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
+
+  OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
+}
+
 void OperationValidator::visit(const ir::operation::Permute &node)
 {
   VERBOSE(Permute) << "Configure Permute operation" << std::endl;
@@ -298,8 +285,6 @@ void OperationValidator::visit(const ir::operation::RNN &node)
               num_units == _ctx.at(hidden_state_out_index).shape().dim(1));
 }
 
-void OperationValidator::visit(const ir::operation::Round &node) { checkUnaryOp(node); }
-
 void OperationValidator::visit(const ir::operation::SpaceToBatchND &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
@@ -353,6 +338,51 @@ void OperationValidator::visit(const ir::operation::SpaceToDepth &node)
   OP_REQUIRES(input_shape.C * block_size * block_size == output_shape.C);
 }
 
+void OperationValidator::visit(const ir::operation::ElementwiseActivation &node)
+{
+  checkUnaryOp(node);
+}
+
+void OperationValidator::visit(const ir::operation::ElementwiseBinary &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
+
+  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
+  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
+}
+
+void OperationValidator::visit(const ir::operation::ElementwiseUnary &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
+
+  OP_REQUIRES(node.getInputs().size() == 1);
+  OP_REQUIRES(node.getOutputs().size() == 1);
+
+  // Check if I/O types match
+  if (node.param().op_type == ir::operation::ElementwiseUnary::Type::DEQUANTIZE)
+  {
+    OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
+    OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::FLOAT32);
+  }
+  else if (node.param().op_type == ir::operation::ElementwiseUnary::Type::QUANTIZE)
+  {
+    OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::FLOAT32);
+    OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
+  }
+  else if (node.param().op_type != ir::operation::ElementwiseUnary::Type::CAST)
+  {
+    OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
+  }
+
+  if (_ctx.at(output_index).info().isDynamic())
+    return;
+
+  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+}
+
 void OperationValidator::visit(const ir::operation::EmbeddingLookup &node)
 {
   const auto output_index{node.getOutputs().at(0)};
@@ -389,8 +419,6 @@ void OperationValidator::visit(const ir::operation::EmbeddingLookup &node)
   }
 }
 
-void OperationValidator::visit(const ir::operation::Exp &node) { checkUnaryOp(node); }
-
 void OperationValidator::visit(const ir::operation::ExpandDims &node)
 {
   const auto output_index{node.getOutputs().at(0)};
@@ -405,8 +433,6 @@ void OperationValidator::visit(const ir::operation::ExpandDims &node)
   OP_REQUIRES(_ctx.at(axis_index).shape().rank() <= 1);
 }
 
-void OperationValidator::visit(const ir::operation::Floor &node) { checkUnaryOp(node); }
-
 void OperationValidator::visit(const ir::operation::HashtableLookup &node)
 {
   const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
@@ -495,21 +521,6 @@ void OperationValidator::visit(const ir::operation::Gather &node)
   OP_REQUIRES(ofm_shape.rank() <= 4);
 }
 
-void OperationValidator::visit(const ir::operation::Dequantize &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-
-  const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
-  OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
-  OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::FLOAT32);
-
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-  OP_REQUIRES(_ctx.at(input_index).shape().rank() <= 4);
-  OP_REQUIRES(_ctx.at(input_index).shape() == _ctx.at(output_index).shape());
-}
-
 void OperationValidator::visit(const ir::operation::DepthToSpace &node)
 {
   // param check
@@ -822,30 +833,6 @@ void OperationValidator::visit(const ir::operation::Pad &node)
   OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
 }
 
-void OperationValidator::visit(const ir::operation::Min &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  // This validator does not check shape. So checking isDynamic() is skipped.
-
-  const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
-  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
-  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
-}
-
-void OperationValidator::visit(const ir::operation::Max &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  // This validator does not check shape. So checking isDynamic() is skipped.
-
-  const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
-  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
-  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
-}
-
 void OperationValidator::visit(const ir::operation::Select &node)
 {
   const auto output_index{node.getOutputs().at(0)};
@@ -899,12 +886,6 @@ void OperationValidator::visit(const ir::operation::Split &node)
   OP_REQUIRES(_ctx.at(input_index).shape().dim(axis) % num_splits == 0);
 }
 
-void OperationValidator::visit(const ir::operation::Cos &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::Sin &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::RSQRT &node) { checkUnaryOp(node); }
-
 void OperationValidator::visit(const ir::operation::Shape &node)
 {
   const auto output_index{node.getOutputs().at(0)};
@@ -961,12 +942,6 @@ void OperationValidator::visit(const ir::operation::While &node)
   // TODO Add to validate with subgraphs
 }
 
-void OperationValidator::visit(const ir::operation::Neg &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::Log &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::LogicalNot &node) { checkUnaryOp(node); }
-
 void OperationValidator::visit(const ir::operation::SquaredDifference &node)
 {
   const auto output_index{node.getOutputs().at(0)};
@@ -1027,16 +1002,6 @@ void OperationValidator::visit(const ir::operation::Tile &node)
   OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
 }
 
-void OperationValidator::visit(const ir::operation::LogicalOr &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(0)};
-  const auto rhs_index{node.getInputs().at(1)};
-
-  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
-  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
-}
-
 void OperationValidator::visit(const ir::operation::Range &node)
 {
   const auto output_index{node.getOutputs().at(0)};
@@ -1084,24 +1049,5 @@ void OperationValidator::visit(const ir::operation::LogSoftmax &node)
   OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
 }
 
-void OperationValidator::visit(const ir::operation::Quantize &node)
-{
-  VERBOSE(Quantize) << "Configure Quantize operation" << std::endl;
-
-  OP_REQUIRES(node.getInputs().size() == 1);
-  OP_REQUIRES(node.getOutputs().size() == 1);
-
-  const auto input_index{node.getInputs().at(0)};
-  const auto output_index{node.getOutputs().at(0)};
-
-  OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::FLOAT32);
-
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
-
-  OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
-}
 } // namespace compiler
 } // namespace onert
diff --git a/runtime/onert/core/src/compiler/OperationValidator.h b/runtime/onert/core/src/compiler/OperationValidator.h
index b27e6863c..deb6357bb 100644
--- a/runtime/onert/core/src/compiler/OperationValidator.h
+++ b/runtime/onert/core/src/compiler/OperationValidator.h
@@ -44,58 +44,45 @@ public:
   void operator()();
 
 public:
-  void visit(const ir::operation::Abs &node) override;
-  void visit(const ir::operation::AvgPool2D &node) override;
   void visit(const ir::operation::BatchMatMul &node) override;
   void visit(const ir::operation::BatchToSpaceND &node) override;
-  void visit(const ir::operation::Cast &node) override;
   void visit(const ir::operation::Comparison &node) override;
   void visit(const ir::operation::Softmax &node) override;
   void visit(const ir::operation::InstanceNorm &node) override;
   void visit(const ir::operation::Permute &node) override;
+  void visit(const ir::operation::Pool2D &node) override;
   void visit(const ir::operation::Reduce &node) override;
   void visit(const ir::operation::Transpose &node) override;
   void visit(const ir::operation::RNN &node) override;
-  void visit(const ir::operation::Round &node) override;
   void visit(const ir::operation::SpaceToBatchND &node) override;
   void visit(const ir::operation::SpaceToDepth &node) override;
+  void visit(const ir::operation::ElementwiseActivation &node) override;
+  void visit(const ir::operation::ElementwiseBinary &node) override;
+  void visit(const ir::operation::ElementwiseUnary &node) override;
   void visit(const ir::operation::EmbeddingLookup &node) override;
-  void visit(const ir::operation::Exp &node) override;
   void visit(const ir::operation::ExpandDims &node) override;
-  void visit(const ir::operation::Floor &node) override;
   void visit(const ir::operation::HashtableLookup &node) override;
   void visit(const ir::operation::TransposeConv &node) override;
   void visit(const ir::operation::Gather &node) override;
-  void visit(const ir::operation::Dequantize &node) override;
   void visit(const ir::operation::DepthToSpace &node) override;
   void visit(const ir::operation::Pack &node) override;
   void visit(const ir::operation::LSTM &node) override;
   void visit(const ir::operation::L2Normalization &node) override;
   void visit(const ir::operation::Unpack &node) override;
   void visit(const ir::operation::Pad &node) override;
-  void visit(const ir::operation::Min &node) override;
-  void visit(const ir::operation::Max &node) override;
   void visit(const ir::operation::Select &node) override;
   void visit(const ir::operation::StridedSlice &node) override;
   void visit(const ir::operation::Split &node) override;
-  void visit(const ir::operation::Cos &node) override;
-  void visit(const ir::operation::Sin &node) override;
-  void visit(const ir::operation::RSQRT &node) override;
   void visit(const ir::operation::Shape &node) override;
   void visit(const ir::operation::ResizeBilinear &node) override;
   void visit(const ir::operation::Reverse &node) override;
   void visit(const ir::operation::If &node) override;
   void visit(const ir::operation::While &node) override;
-  void visit(const ir::operation::Neg &node) override;
-  void visit(const ir::operation::Log &node) override;
-  void visit(const ir::operation::LogicalNot &node) override;
   void visit(const ir::operation::SquaredDifference &node) override;
   void visit(const ir::operation::Tile &node) override;
-  void visit(const ir::operation::LogicalOr &node) override;
   void visit(const ir::operation::Range &node) override;
   void visit(const ir::operation::MatrixBandPart &node) override;
   void visit(const ir::operation::LogSoftmax &node) override;
-  void visit(const ir::operation::Quantize &node) override;
 
 private:
   void checkUnaryOp(const ir::Operation &node);
diff --git a/runtime/onert/core/src/compiler/StaticShapeInference.cc b/runtime/onert/core/src/compiler/StaticShapeInference.cc
index 76c1edcbc..4eba1ff49 100644
--- a/runtime/onert/core/src/compiler/StaticShapeInference.cc
+++ b/runtime/onert/core/src/compiler/StaticShapeInference.cc
@@ -25,6 +25,64 @@ namespace onert
 namespace compiler
 {
 
+bool StaticShapeInferer::infer(const ir::OpSequence &op_seq)
+{
+  bool has_dynamic_tensor = false;
+
+  for (const auto &operation_idx : op_seq.operations())
+  {
+    auto &op = _operations.at(operation_idx);
+    auto opcode = op.opcode();
+
+    _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit()
+
+    // IF: need shape inference for then, else
+    // While: need shape inference for condition, body
+    if (opcode == ir::OpCode::If || opcode == ir::OpCode::While)
+    {
+      op.accept(*this);
+    }
+    else
+    {
+      _return_has_dynamic_tensor = checkDynamicInput(op);
+
+      if (_return_has_dynamic_tensor)
+      {
+        setDynamicOutput(op);
+      }
+      else
+      {
+        op.accept(*this);
+      }
+    }
+
+    has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor;
+  }
+
+  return has_dynamic_tensor;
+}
+
+bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op)
+{
+  for (auto input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED)
+  {
+    if (_operands.at(input_idx).info().isDynamic())
+    {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void StaticShapeInferer::setDynamicOutput(const ir::Operation &op)
+{
+  for (auto output_idx : op.getOutputs())
+  {
+    _operands.at(output_idx).info().setDynamic();
+  }
+}
+
 void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
                                                   const ir::OperandIndex lhs_idx,
                                                   const ir::OperandIndex rhs_idx)
@@ -35,13 +93,6 @@ void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
   const auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
 
-  if (lhs.info().isDynamic() || rhs.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   // re-sizing output shape
   ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs.info().shape(), rhs.info().shape());
   output.info().shape(new_shape);
@@ -56,14 +107,6 @@ void StaticShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
   const auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
 
-  // if input is dynamic, output also becomes dynamic
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   // re-sizing output shape
   ir::Shape new_shape = input.info().shape();
   output.info().shape(new_shape);
@@ -99,17 +142,6 @@ void StaticShapeInferer::dump()
   }
 }
 
-void StaticShapeInferer::visit(const ir::operation::Abs &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Abs::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Add &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Add::Input::LHS),
-                           op.getInputs().at(ir::operation::Add::Input::RHS));
-}
-
 void StaticShapeInferer::visit(const ir::operation::ArgMax &op)
 {
   const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
@@ -118,15 +150,6 @@ void StaticShapeInferer::visit(const ir::operation::ArgMax &op)
   // get mutable output operand
   const auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
-
-  // if input is dynamic, output also becomes dynamic
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   const auto rank = input.info().shape().rank();
   const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
 
@@ -145,35 +168,22 @@ void StaticShapeInferer::visit(const ir::operation::BatchMatMul &op)
   const auto lhs = _operands.at(lhs_index);
   const auto rhs = _operands.at(rhs_index);
   auto &output = _operands.at(output_index);
-
-  if (lhs.info().isDynamic() || rhs.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   auto new_shape = shape_inference::inferBatchMatMulShape(lhs.shape(), rhs.shape(), op.param());
   output.info().shape(new_shape);
 }
 
-void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
+void StaticShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
 {
-  const auto input_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
+  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS),
+                           op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS));
+}
 
+void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
+{
   // get mutable output operand
   const auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
 
-  // if input is dynamic, output also becomes dynamic.
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   const auto shape_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::SHAPE)};
   const auto &shape = _operands.at(shape_idx);
 
@@ -192,11 +202,6 @@ void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
   output.info().shape(new_shape);
 }
 
-void StaticShapeInferer::visit(const ir::operation::Cast &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cast::Input::INPUT));
-}
-
 void StaticShapeInferer::visit(const ir::operation::Comparison &op)
 {
   handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0),
@@ -215,14 +220,6 @@ void StaticShapeInferer::visit(const ir::operation::Concat &op)
   {
     const auto input_idx{op.getInputs().at(i)};
     const auto &input = _operands.at(input_idx);
-
-    if (input.info().isDynamic())
-    {
-      output.info().setDynamic();
-      _return_has_dynamic_tensor = true;
-      return;
-    }
-
     input_shapes.emplace_back(input.shape());
   }
 
@@ -241,33 +238,26 @@ void StaticShapeInferer::visit(const ir::operation::Conv2D &op)
   const auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
 
-  if (input.info().isDynamic() || ker.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   // re-sizing output shape
   ir::Shape new_shape =
       shape_inference::inferConv2DShape(input.info().shape(), ker.info().shape(), op.param());
   output.info().shape(new_shape);
 }
 
-void StaticShapeInferer::visit(const ir::operation::Cos &op)
+void StaticShapeInferer::visit(const ir::operation::ElementwiseActivation &op)
 {
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cos::Input::INPUT));
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT));
 }
 
-void StaticShapeInferer::visit(const ir::operation::Div &op)
+void StaticShapeInferer::visit(const ir::operation::ElementwiseBinary &op)
 {
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Div::Input::LHS),
-                           op.getInputs().at(ir::operation::Div::Input::RHS));
+  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS),
+                           op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS));
 }
 
-void StaticShapeInferer::visit(const ir::operation::Exp &op)
+void StaticShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
 {
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Exp::Input::INPUT));
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT));
 }
 
 void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
@@ -279,13 +269,6 @@ void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
   const auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
 
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   if (!axis.isConstant())
   {
     output.info().setDynamic();
@@ -310,13 +293,6 @@ void StaticShapeInferer::visit(const ir::operation::Fill &op)
   const auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
 
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   if (!input.isConstant())
   {
     output.info().setDynamic();
@@ -345,15 +321,6 @@ void StaticShapeInferer::visit(const ir::operation::FullyConnected &op)
   // get mutable output operand
   const auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
-
-  // if input or ker is dynamic, output also becomes dynamic
-  if (input.info().isDynamic() || ker.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   // re-sizing output shape
   ir::Shape new_shape =
       shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape());
@@ -376,15 +343,6 @@ void StaticShapeInferer::visit(const ir::operation::Gather &op)
 
   const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)};
   const auto &indices = _operands.at(indices_idx);
-
-  // if input is dynamic, output also becomes dynamic
-  if (input.info().isDynamic() || indices.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   const auto rank = input.info().shape().rank();
   const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
 
@@ -476,27 +434,6 @@ void StaticShapeInferer::visit(const ir::operation::If &op)
   }
 }
 
-void StaticShapeInferer::visit(const ir::operation::Log &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Log::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::LogicalNot &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::LogicalNot::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::LogicalOr &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::LogicalOr::Input::INPUT0),
-                           op.getInputs().at(ir::operation::LogicalOr::Input::INPUT1));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Logistic &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Logistic::Input::INPUT));
-}
-
 void StaticShapeInferer::visit(const ir::operation::L2Normalization &op)
 {
   handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::Input::INPUT));
@@ -507,29 +444,6 @@ void StaticShapeInferer::visit(const ir::operation::MatrixBandPart &op)
   handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT));
 }
 
-void StaticShapeInferer::visit(const ir::operation::Max &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Max::Input::LHS),
-                           op.getInputs().at(ir::operation::Max::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Min &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Min::Input::LHS),
-                           op.getInputs().at(ir::operation::Min::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Mul &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Mul::Input::LHS),
-                           op.getInputs().at(ir::operation::Mul::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Neg &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Neg::Input::INPUT));
-}
-
 void StaticShapeInferer::visit(const ir::operation::OneHot &op)
 {
   const auto indice_idx{op.getInputs().at(ir::operation::OneHot::Input::INDICES)};
@@ -542,7 +456,7 @@ void StaticShapeInferer::visit(const ir::operation::OneHot &op)
   auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
 
-  if (indice.info().isDynamic() || depth.info().isDynamic() || !depth.isConstant())
+  if (!depth.isConstant())
   {
     output.info().setDynamic();
     _return_has_dynamic_tensor = true;
@@ -558,18 +472,6 @@ void StaticShapeInferer::visit(const ir::operation::OneHot &op)
 
 void StaticShapeInferer::visit(const ir::operation::Pack &op)
 {
-  bool is_any_of_inputs_dynamic = [&]() -> bool {
-    for (uint32_t i = 0; i < op.getInputs().size(); ++i)
-    {
-      const auto &input = _operands.at(op.getInputs().at(i));
-      if (input.info().isDynamic())
-      {
-        return true;
-      }
-    }
-    return false;
-  }();
-
   const auto input_idx{op.getInputs().at(0)};
   const auto &input = _operands.at(input_idx);
 
@@ -577,14 +479,6 @@ void StaticShapeInferer::visit(const ir::operation::Pack &op)
   const auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
 
-  // if input is dynamic, output also becomes dynamic
-  if (is_any_of_inputs_dynamic)
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   const auto rank = input.shape().rank() + 1;
   const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
   const auto num = op.param().num;
@@ -608,14 +502,6 @@ void StaticShapeInferer::visit(const ir::operation::Pad &op)
   const auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
 
-  // if input is dynamic or pad is dynamic, output also becomes dynamic
-  if (input.info().isDynamic() || pad.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   // if pad is not constant, output also becomes dynamic
   if (!pad.isConstant())
   {
@@ -638,13 +524,6 @@ void StaticShapeInferer::visit(const ir::operation::Permute &op)
   const auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
 
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   // re-sizing output shape
   // Permute is a special operation that layouts of input/output may be different on backend
   // However, it is not applied here, so input/output have the same layout of frontend. Because
@@ -672,13 +551,6 @@ void StaticShapeInferer::visit(const ir::operation::Range &op)
   // get mutable output operand
   const auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
-  // if any input is dynamic, output also becomes dynamic
-  if (start_op.info().isDynamic() || limit_op.info().isDynamic() || delta_op.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
 
   ir::Shape new_shape;
   if (start_op.isConstant() && limit_op.isConstant() && delta_op.isConstant())
@@ -716,14 +588,6 @@ void StaticShapeInferer::visit(const ir::operation::Reduce &op)
   const auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
 
-  // if input is dynamic, output also becomes dynamic
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   std::vector<int32_t> axes_vec;
   for (size_t i = 0; i < axes.shape().num_elements(); ++i)
   {
@@ -761,14 +625,6 @@ void StaticShapeInferer::visit(const ir::operation::Reshape &op)
   const auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
 
-  // if input is dynamic, output also becomes dynamic
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   // New shape is given by second input tensor
   if (op.getInputs().size() == 2)
   {
@@ -827,14 +683,6 @@ void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op)
   const auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
 
-  // if input is dynamic, output also becomes dynamic
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   // Shape inferencing logic based on Params
   ir::Shape new_shape = shape_inference::inferResizeBilinearShape(
       input.shape(), op.param().height_out, op.param().width_out);
@@ -852,16 +700,6 @@ void StaticShapeInferer::visit(const ir::operation::Reverse &op)
   handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::Input::INPUT));
 }
 
-void StaticShapeInferer::visit(const ir::operation::Round &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Round::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::RSQRT &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::RSQRT::Input::INPUT));
-}
-
 void StaticShapeInferer::visit(const ir::operation::Select &op)
 {
   const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)};
@@ -876,14 +714,6 @@ void StaticShapeInferer::visit(const ir::operation::Select &op)
   auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
 
-  if (input_cond.info().isDynamic() || input_true.info().isDynamic() ||
-      input_false.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   // Select output shpae
   ir::Shape new_shape = shape_inference::inferSelectShape(
       input_cond.info().shape(), input_true.info().shape(), input_false.info().shape());
@@ -899,14 +729,6 @@ void StaticShapeInferer::visit(const ir::operation::Shape &op)
   const auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
 
-  // if input is dynamic, output also becomes dynamic
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   // re-sizing output shape
   ir::Shape output_shape;
   output_shape.append(input.info().shape().rank());
@@ -914,11 +736,6 @@ void StaticShapeInferer::visit(const ir::operation::Shape &op)
   output.info().shape(output_shape);
 }
 
-void StaticShapeInferer::visit(const ir::operation::Sin &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Sin::Input::INPUT));
-}
-
 void StaticShapeInferer::visit(const ir::operation::Slice &op)
 {
   const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
@@ -930,13 +747,6 @@ void StaticShapeInferer::visit(const ir::operation::Slice &op)
   const auto output_index = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_index);
 
-  if (input.info().isDynamic() || begins.info().isDynamic() || sizes.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   // Whether input is constant or not does not affect whether output is dynamic or not
   if (!(begins.isConstant() && sizes.isConstant()))
   {
@@ -970,13 +780,6 @@ void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
   const auto &block_shape = _operands.at(block_shape_idx);
   const auto &padding = _operands.at(padding_idx);
 
-  if (input.info().isDynamic() || block_shape.info().isDynamic() || padding.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   // Whether input is constant or not does not affect whether output is dynamic or not
   if (!(block_shape.isConstant() && padding.isConstant()))
   {
@@ -1006,18 +809,6 @@ void StaticShapeInferer::visit(const ir::operation::Split &op)
   const auto axis = op.param().axis;
   const auto num_splits = op.param().num_splits;
 
-  if (input.info().isDynamic())
-  {
-    for (int out_tensor_idx = 0; out_tensor_idx < num_splits; out_tensor_idx++)
-    {
-      const auto output_idx = op.getOutputs().at(out_tensor_idx);
-      ir::Operand &output = _operands.at(output_idx);
-      output.info().setDynamic();
-    }
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   const auto rank = input.info().shape().rank();
   auto axis_resolved = axis < 0 ? axis + rank : axis;
 
@@ -1072,14 +863,6 @@ void StaticShapeInferer::visit(const ir::operation::StridedSlice &op)
   const auto output_index = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_index);
 
-  if (input.info().isDynamic() || starts.info().isDynamic() || ends.info().isDynamic() ||
-      strides.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   if (!(starts.isConstant() && ends.isConstant() && strides.isConstant()))
   {
     output.info().setDynamic();
@@ -1104,17 +887,6 @@ void StaticShapeInferer::visit(const ir::operation::StridedSlice &op)
   output.info().shape(new_shape);
 }
 
-void StaticShapeInferer::visit(const ir::operation::Sub &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Sub::Input::LHS),
-                           op.getInputs().at(ir::operation::Sub::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Tanh &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Tanh::Input::INPUT));
-}
-
 void StaticShapeInferer::visit(const ir::operation::Tile &op)
 {
   const auto input_idx{op.getInputs().at(ir::operation::Tile::Input::INPUT)};
@@ -1126,13 +898,6 @@ void StaticShapeInferer::visit(const ir::operation::Tile &op)
   const auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
 
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   if (!multiplier.isConstant())
   {
     output.info().setDynamic();
@@ -1158,13 +923,7 @@ void StaticShapeInferer::visit(const ir::operation::Transpose &op)
   ir::Operand &output = _operands.at(output_idx);
   const auto perm{op.param().perm};
   // const auto rank{op.param().rank};
-  // if input is dynamic, output also becomes dynamic
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
+
   // set output shape, based on input and params
   ir::Shape new_shape = shape_inference::inferTransposeShape(input.info().shape(), perm);
   output.info().shape(new_shape);
@@ -1175,20 +934,6 @@ void StaticShapeInferer::visit(const ir::operation::Unpack &op)
   const auto input_idx{op.getInputs().at(0)};
   const auto &input = _operands.at(input_idx);
   const auto num = op.param().num;
-
-  // if input is dynamic, output also becomes dynamic
-  if (input.info().isDynamic())
-  {
-    for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
-    {
-      const auto output_idx = op.getOutputs().at(out_tensor_idx);
-      ir::Operand &output = _operands.at(output_idx);
-      output.info().setDynamic();
-    }
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
   const auto rank = input.shape().rank();
   const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
 
@@ -1346,11 +1091,6 @@ void StaticShapeInferer::visit(const ir::operation::While &op)
   }
 }
 
-void StaticShapeInferer::visit(const ir::operation::ZerosLike &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ZerosLike::Input::INPUT));
-}
-
 } // namespace compiler
 
 } // namespace onert
diff --git a/runtime/onert/core/src/compiler/TensorBuilders.h b/runtime/onert/core/src/compiler/TensorBuilders.h
index c0a1ebc04..3b0360b4b 100644
--- a/runtime/onert/core/src/compiler/TensorBuilders.h
+++ b/runtime/onert/core/src/compiler/TensorBuilders.h
@@ -67,17 +67,6 @@ public:
     return _cf_tensor_builder;
   }
 
-  std::shared_ptr<backend::ITensor> getITensor(ir::OperandIndex ind)
-  {
-    for (auto &tensor_builder : _tensor_builders)
-    {
-      auto tensor = tensor_builder->tensorAt(ind);
-      if (tensor)
-        return tensor;
-    }
-    return nullptr;
-  }
-
 private:
   std::unordered_set<std::shared_ptr<backend::ITensorBuilder>> _tensor_builders;
   std::shared_ptr<backend::controlflow::TensorBuilder> _cf_tensor_builder;
diff --git a/runtime/onert/core/src/compiler/TensorRegistries.h b/runtime/onert/core/src/compiler/TensorRegistries.h
new file mode 100644
index 000000000..8be87b081
--- /dev/null
+++ b/runtime/onert/core/src/compiler/TensorRegistries.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_TENSOR_REGISTRIES_H__
+#define __ONERT_COMPILER_TENSOR_REGISTRIES_H__
+
+#include <unordered_set>
+#include <memory>
+#include "backend/BackendContext.h"
+#include "backend/Backend.h"
+#include "backend/controlflow/Config.h"
+#include "backend/controlflow/TensorBuilder.h"
+#include "backend/controlflow/TensorRegistry.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+class TensorRegistries
+{
+public:
+  TensorRegistries() = default;
+
+  TensorRegistries(const onert::backend::BackendContexts &backend_contexts,
+                   bool include_controlflow)
+  {
+    for (const auto &e : backend_contexts)
+    {
+      auto tensor_reg = e.second->tensor_registry;
+      if (e.first->config()->id() == backend::controlflow::Config::ID)
+      {
+        _cf_tensor_reg =
+            std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(tensor_reg);
+        if (include_controlflow)
+          _tensor_regs.insert(tensor_reg);
+      }
+      else
+      {
+        _tensor_regs.insert(tensor_reg);
+      }
+    }
+  }
+
+  std::unordered_set<std::shared_ptr<onert::backend::ITensorRegistry>>::const_iterator begin() const
+  {
+    return _tensor_regs.cbegin();
+  }
+  std::unordered_set<std::shared_ptr<onert::backend::ITensorRegistry>>::const_iterator end() const
+  {
+    return _tensor_regs.cend();
+  }
+
+  std::shared_ptr<backend::controlflow::TensorRegistry> getControlflowTensorRegistry() const
+  {
+    return _cf_tensor_reg;
+  }
+
+  std::shared_ptr<backend::ITensor> getITensor(ir::OperandIndex ind) const
+  {
+    for (auto &tensor_reg : _tensor_regs)
+    {
+      auto tensor = tensor_reg->getITensor(ind);
+      if (tensor)
+        return tensor;
+    }
+    return nullptr;
+  }
+
+private:
+  std::unordered_set<std::shared_ptr<backend::ITensorRegistry>> _tensor_regs;
+  std::shared_ptr<backend::controlflow::TensorRegistry> _cf_tensor_reg;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TENSOR_REGISTRIES_H__
diff --git a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc
index 1742a0dd5..647669e46 100644
--- a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc
@@ -22,20 +22,20 @@
 
 namespace onert
 {
-namespace ir
+namespace compiler
 {
 namespace pass
 {
 
-void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation &node)
+void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::Operation &node)
 {
   const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index);
   const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index);
   const auto backend = op_seq_lower_info->backend();
   const auto layout = op_seq_lower_info->layout();
-  const auto factor = operand::PermuteFactor{backend, layout};
+  const auto factor = ir::operand::PermuteFactor{backend, layout};
 
-  for (const auto input : node.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+  for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
   {
     auto &object = _graph.operands().at(input);
 
@@ -47,7 +47,7 @@ void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation
         auto new_object = object;
         new_object.unsetDef();
         // TODO Remove const_case
-        const_cast<OperationIndexSet &>(new_object.getUses()).clear();
+        const_cast<ir::OperationIndexSet &>(new_object.getUses()).clear();
         const auto new_index = _graph.operands().emplace(new_object);
         _replace_operands_map[key] = new_index;
       }
@@ -89,5 +89,5 @@ void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation
 }
 
 } // namespace pass
-} // namespace ir
+} // namespace compiler
 } // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.h b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h
index 3ea4dc397..052883c92 100644
--- a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.h
+++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
-#define __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
+#define __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
 
 #include <ir/operand/PermuteFactor.h>
 #include <ir/Index.h>
@@ -25,7 +25,7 @@
 
 namespace onert
 {
-namespace ir
+namespace compiler
 {
 namespace pass
 {
@@ -39,13 +39,13 @@ public:
   std::string id() final { return "ConstantInsertionPass"; }
 
 public:
-  void callback(const OperationIndex &index, Operation &node) final;
+  void callback(const ir::OperationIndex &index, ir::Operation &node) final;
 
 private:
   struct ReplaceKey
   {
-    OperandIndex index;
-    operand::PermuteFactor factor;
+    ir::OperandIndex index;
+    ir::operand::PermuteFactor factor;
 
     bool operator==(const ReplaceKey &other) const
     {
@@ -61,15 +61,16 @@ private:
     std::size_t operator()(const ReplaceKey &key) const noexcept
     {
       using std::hash;
-      return hash<OperandIndex>()(key.index) ^ (hash<operand::PermuteFactor>()(key.factor) << 1);
+      return hash<ir::OperandIndex>()(key.index) ^
+             (hash<ir::operand::PermuteFactor>()(key.factor) << 1);
     }
   };
 
-  std::unordered_map<ReplaceKey, OperandIndex, KeyHasher> _replace_operands_map;
+  std::unordered_map<ReplaceKey, ir::OperandIndex, KeyHasher> _replace_operands_map;
 };
 
 } // namespace pass
-} // namespace ir
+} // namespace compiler
 } // namespace onert
 
-#endif // __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.cc b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc
index 04f4e59c0..1c1dbe0ee 100644
--- a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.cc
+++ b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc
@@ -23,28 +23,28 @@
 
 namespace onert
 {
-namespace ir
+namespace compiler
 {
 namespace pass
 {
 
-void ConstantLoweringPass::callback(const OperationIndex &node_index, Operation &node)
+void ConstantLoweringPass::callback(const ir::OperationIndex &node_index, ir::Operation &node)
 {
   const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index);
   const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index);
   const auto backend = op_seq_lower_info->backend();
   const auto layout = op_seq_lower_info->layout();
-  const auto factor = operand::PermuteFactor{backend, layout};
+  const auto factor = ir::operand::PermuteFactor{backend, layout};
 
   // Now this runtime does not support the node making output of operation as constant
-  for (const auto input : node.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+  for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
   {
     auto &object = _graph.operands().at(input);
     if (object.isConstant())
     {
       // All constant operand are already assinged at each backend by ContantInsertionPass. So a
       // constant has `def` and `use` as the same PermuteFactor
-      _lowered_graph.setLowerInfo(input, std::make_unique<operand::LowerInfo>());
+      _lowered_graph.setLowerInfo(input, std::make_unique<ir::operand::LowerInfo>());
       _lowered_graph.getLowerInfo(input)->addDefPermuteFactor(factor);
       _lowered_graph.getLowerInfo(input)->addUsePermuteFactor(factor);
     }
@@ -52,5 +52,5 @@ void ConstantLoweringPass::callback(const OperationIndex &node_index, Operation
 }
 
 } // namespace pass
-} // namespace ir
+} // namespace compiler
 } // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.h b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h
index 5c9f4352b..e17d776d1 100644
--- a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.h
+++ b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_GRAPH_PASS_CONSTANT_LOWERING_PASS_H__
-#define __ONERT_GRAPH_PASS_CONSTANT_LOWERING_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_CONSTANT_LOWERING_PASS_H__
+#define __ONERT_COMPILER_PASS_CONSTANT_LOWERING_PASS_H__
 
 #include <ir/Index.h>
 #include "LoweredOperationPass.h"
 
 namespace onert
 {
-namespace ir
+namespace compiler
 {
 namespace pass
 {
@@ -36,11 +36,11 @@ public:
   std::string id() final { return "ConstantLoweringPass"; }
 
 public:
-  void callback(const OperationIndex &index, Operation &node) final;
+  void callback(const ir::OperationIndex &index, ir::Operation &node) final;
 };
 
 } // namespace pass
-} // namespace ir
+} // namespace compiler
 } // namespace onert
 
-#endif // __ONERT_GRAPH_PASS_CONSTANT_LOWERING_PASS_H__
+#endif // __ONERT_COMPILER_PASS_CONSTANT_LOWERING_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/LoweredOperandPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h
index eefb8ddfb..0c5f7d745 100644
--- a/runtime/onert/core/src/ir/pass/LoweredOperandPass.h
+++ b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h
@@ -18,11 +18,11 @@
 #define __ONERT_IR_PASS_LOWERED_OPERAND_PASS_H__
 
 #include "OperandPass.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
 
 namespace onert
 {
-namespace ir
+namespace compiler
 {
 namespace pass
 {
@@ -30,7 +30,7 @@ namespace pass
 class LoweredOperandPass : public OperandPass
 {
 public:
-  LoweredOperandPass(ir::LoweredGraph &lowered_graph)
+  LoweredOperandPass(compiler::LoweredGraph &lowered_graph)
       : OperandPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
   {
     // DO NOTHING
@@ -39,14 +39,14 @@ public:
   virtual ~LoweredOperandPass() = default;
 
   std::string id() override = 0;
-  void callback(const OperandIndex &i, Operand &o) override = 0;
+  void callback(const ir::OperandIndex &i, ir::Operand &o) override = 0;
 
 protected:
-  ir::LoweredGraph &_lowered_graph;
+  compiler::LoweredGraph &_lowered_graph;
 };
 
 } // namespace pass
-} // namespace ir
+} // namespace compiler
 } // namespace onert
 
 #endif // __ONERT_IR_PASS_LOWERED_OPERAND_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/LoweredOperationPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h
index 0138712d7..5c8569be2 100644
--- a/runtime/onert/core/src/ir/pass/LoweredOperationPass.h
+++ b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h
@@ -18,11 +18,11 @@
 #define __ONERT_IR_PASS_LOWERED_OPERATION_PASS_H__
 
 #include "OperationPass.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
 
 namespace onert
 {
-namespace ir
+namespace compiler
 {
 namespace pass
 {
@@ -30,7 +30,7 @@ namespace pass
 class LoweredOperationPass : public OperationPass
 {
 public:
-  LoweredOperationPass(ir::LoweredGraph &lowered_graph)
+  LoweredOperationPass(LoweredGraph &lowered_graph)
       : OperationPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
   {
     // DO NOTHING
@@ -39,14 +39,14 @@ public:
   virtual ~LoweredOperationPass() = default;
 
   std::string id() override = 0;
-  void callback(const OperationIndex &i, Operation &o) override = 0;
+  void callback(const ir::OperationIndex &i, ir::Operation &o) override = 0;
 
 protected:
-  ir::LoweredGraph &_lowered_graph;
+  LoweredGraph &_lowered_graph;
 };
 
 } // namespace pass
-} // namespace ir
+} // namespace compiler
 } // namespace onert
 
 #endif // __ONERT_IR_PASS_LOWERED_OPERATION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/OperandPass.cc b/runtime/onert/core/src/compiler/pass/OperandPass.cc
index 693a0f493..50c001c30 100644
--- a/runtime/onert/core/src/ir/pass/OperandPass.cc
+++ b/runtime/onert/core/src/compiler/pass/OperandPass.cc
@@ -20,7 +20,7 @@
 
 namespace onert
 {
-namespace ir
+namespace compiler
 {
 namespace pass
 {
@@ -28,9 +28,9 @@ namespace pass
 void OperandPass::run()
 {
   _graph.operands().iterate(
-      [&](const OperandIndex &index, Operand &object) { callback(index, object); });
+      [&](const ir::OperandIndex &index, ir::Operand &object) { callback(index, object); });
 }
 
 } // namespace pass
-} // namespace ir
+} // namespace compiler
 } // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/OperandPass.h b/runtime/onert/core/src/compiler/pass/OperandPass.h
index 393060741..b094879c5 100644
--- a/runtime/onert/core/src/ir/pass/OperandPass.h
+++ b/runtime/onert/core/src/compiler/pass/OperandPass.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_GRAPH_PASS_OPERAND_PASS_H__
-#define __ONERT_GRAPH_PASS_OPERAND_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_OPERAND_PASS_H__
+#define __ONERT_COMPILER_PASS_OPERAND_PASS_H__
 
 #include "Pass.h"
 #include "ir/Index.h"
@@ -30,7 +30,7 @@ class Operand;
 
 namespace onert
 {
-namespace ir
+namespace compiler
 {
 namespace pass
 {
@@ -44,11 +44,11 @@ public:
 public:
   std::string id() override = 0;
   void run() override final;
-  virtual void callback(const OperandIndex &i, Operand &o) = 0;
+  virtual void callback(const ir::OperandIndex &i, ir::Operand &o) = 0;
 };
 
 } // namespace pass
-} // namespace ir
+} // namespace compiler
 } // namespace onert
 
-#endif // __ONERT_GRAPH_PASS_OPERAND_PASS_H__
+#endif // __ONERT_COMPILER_PASS_OPERAND_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/OperationPass.cc b/runtime/onert/core/src/compiler/pass/OperationPass.cc
index 84b1da3ee..d7a55cb22 100644
--- a/runtime/onert/core/src/ir/pass/OperationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/OperationPass.cc
@@ -22,7 +22,7 @@
 
 namespace onert
 {
-namespace ir
+namespace compiler
 {
 namespace pass
 {
@@ -30,9 +30,9 @@ namespace pass
 void OperationPass::run()
 {
   _graph.operations().iterate(
-      [&](const OperationIndex &index, Operation &node) { callback(index, node); });
+      [&](const ir::OperationIndex &index, ir::Operation &node) { callback(index, node); });
 }
 
 } // namespace pass
-} // namespace ir
+} // namespace compiler
 } // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/OperationPass.h b/runtime/onert/core/src/compiler/pass/OperationPass.h
index 1733f87ed..ac4d818a2 100644
--- a/runtime/onert/core/src/ir/pass/OperationPass.h
+++ b/runtime/onert/core/src/compiler/pass/OperationPass.h
@@ -19,8 +19,8 @@
  * @brief This file contains OperationPass class
  */
 
-#ifndef __ONERT_GRAPH_PASS_OPERATION_PASS_H__
-#define __ONERT_GRAPH_PASS_OPERATION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_OPERATION_PASS_H__
+#define __ONERT_COMPILER_PASS_OPERATION_PASS_H__
 
 #include "Pass.h"
 #include "ir/Index.h"
@@ -35,7 +35,7 @@ class Operation;
 
 namespace onert
 {
-namespace ir
+namespace compiler
 {
 namespace pass
 {
@@ -62,7 +62,7 @@ public:
    * @param index is the index of a node in graph
    * @param node is the node in graph
    */
-  virtual void callback(const OperationIndex &index, Operation &node) = 0;
+  virtual void callback(const ir::OperationIndex &index, ir::Operation &node) = 0;
 
   /**
    * @brief Run the pass
@@ -71,7 +71,7 @@ public:
 };
 
 } // namespace pass
-} // namespace ir
+} // namespace compiler
 } // namespace onert
 
-#endif // __ONERT_GRAPH_PASS_OPERATION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_OPERATION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/Pass.h b/runtime/onert/core/src/compiler/pass/Pass.h
index 1c6628f6f..3f356c337 100644
--- a/runtime/onert/core/src/ir/pass/Pass.h
+++ b/runtime/onert/core/src/compiler/pass/Pass.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_GRAPH_PASS_PASS_H__
-#define __ONERT_GRAPH_PASS_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_PASS_H__
+#define __ONERT_COMPILER_PASS_PASS_H__
 
 #include <string>
 
@@ -24,12 +24,12 @@ namespace onert
 namespace ir
 {
 class Graph;
-} // namespace ir
+} // namespace compiler
 } // namespace onert
 
 namespace onert
 {
-namespace ir
+namespace compiler
 {
 namespace pass
 {
@@ -37,7 +37,7 @@ namespace pass
 class Pass
 {
 public:
-  Pass(Graph &graph) : _graph{graph} {}
+  Pass(ir::Graph &graph) : _graph{graph} {}
   virtual ~Pass() = default;
 
 public:
@@ -45,11 +45,11 @@ public:
   virtual void run() = 0;
 
 protected:
-  Graph &_graph;
+  ir::Graph &_graph;
 };
 
 } // namespace pass
-} // namespace ir
+} // namespace compiler
 } // namespace onert
 
-#endif // __ONERT_GRAPH_PASS_PASS_H__
+#endif // __ONERT_COMPILER_PASS_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
index 2deccd40b..f01697034 100644
--- a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
@@ -21,35 +21,33 @@
 
 namespace onert
 {
-namespace ir
+namespace compiler
 {
 namespace pass
 {
 
-void PermutationEliminationPass::callback(const OperationIndex &ind, Operation &node)
+void PermutationEliminationPass::callback(const ir::OperationIndex &ind, ir::Operation &node)
 {
   _op_ind = ind;
   node.accept(*this);
 };
 
-void PermutationEliminationPass::visit(const operation::Permute &node)
+void PermutationEliminationPass::visit(const ir::operation::Permute &node)
 {
   auto in_operand = node.getInputs().at(0);
   auto out_operand = node.getOutputs().at(0);
 
-  // Check if two tensors are both portable
-  // TODO Make this general, this is just a workaround to check two tensors are portable
+  // Check if two tensors are both portable if not, we can't eliminate the node
   {
     auto in_def_factor = _lowered_graph.getLowerInfo(in_operand)->def_factors().getOnlyElement();
     auto out_def_factor = _lowered_graph.getLowerInfo(out_operand)->def_factors().getOnlyElement();
 
-    auto in_backend_id = in_def_factor.backend()->config()->id();
-    auto out_backend_id = out_def_factor.backend()->config()->id();
+    auto in_config = in_def_factor.backend()->config();
+    auto out_config = out_def_factor.backend()->config();
 
-    // TODO Fix this workaround that removes only Permute between cpu and controlflow backend.
-    //      This should be general.
-    if (!((in_backend_id == backend::controlflow::Config::ID && out_backend_id == "cpu") ||
-          (in_backend_id == "cpu" && out_backend_id == backend::controlflow::Config::ID)))
+    // FIXME Supporting dynamic tensor does not exactly mean those are portable.
+    //       It may need to have another config option for checking if each uses `IPortableTensor`.
+    if (!(in_config->supportDynamicTensor() && out_config->supportDynamicTensor()))
       return;
   }
 
@@ -65,7 +63,7 @@ void PermutationEliminationPass::visit(const operation::Permute &node)
       if (!op_seq.getOutputs().contains(in_operand))
         return;
 
-      // Update OpSequence/Operation edges and Operand edges
+      // Update OpSequence/ir::Operation edges and ir::Operand edges
       op_seq.replaceOutputs(in_operand, out_operand);
       for (auto op : op_seq.operations())
       {
@@ -106,8 +104,8 @@ void PermutationEliminationPass::visit(const operation::Permute &node)
     });
 
     VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
-    VERBOSE(removePermute) << "  - Input (removed) Operand : " << in_operand << std::endl;
-    VERBOSE(removePermute) << "  - Output(kept)    Operand : " << out_operand << std::endl;
+    VERBOSE(removePermute) << "  - Input (removed) ir::Operand : " << in_operand << std::endl;
+    VERBOSE(removePermute) << "  - Output(kept)    ir::Operand : " << out_operand << std::endl;
   }
   else
   {
@@ -145,11 +143,11 @@ void PermutationEliminationPass::visit(const operation::Permute &node)
     }
 
     VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
-    VERBOSE(removePermute) << "  - Input (kept)    Operand : " << in_operand << std::endl;
-    VERBOSE(removePermute) << "  - Output(removed) Operand : " << out_operand << std::endl;
+    VERBOSE(removePermute) << "  - Input (kept)    ir::Operand : " << in_operand << std::endl;
+    VERBOSE(removePermute) << "  - Output(removed) ir::Operand : " << out_operand << std::endl;
   }
 }
 
 } // namespace pass
-} // namespace ir
+} // namespace compiler
 } // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.h b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h
index 614e44cd2..29daf1a82 100644
--- a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.h
+++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
-#define __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_PERMUTATION_ELIMINATION_PASS_H__
+#define __ONERT_COMPILER_PASS_PERMUTATION_ELIMINATION_PASS_H__
 
 #include "ir/OperationVisitor.h"
 #include "LoweredOperationPass.h"
 
 namespace onert
 {
-namespace ir
+namespace compiler
 {
 namespace pass
 {
@@ -40,7 +40,7 @@ namespace pass
  * @note This is an optimization pass which means that everything should work fine even if this pass
  *       was skipped.
  */
-class PermutationEliminationPass : public LoweredOperationPass, public OperationVisitor
+class PermutationEliminationPass : public LoweredOperationPass, public ir::OperationVisitor
 {
 public:
   using LoweredOperationPass::LoweredOperationPass;
@@ -49,17 +49,17 @@ public:
   std::string id() final { return "PermutationEliminationPass"; }
 
 public:
-  void callback(const OperationIndex &i, Operation &n) final;
+  void callback(const ir::OperationIndex &i, ir::Operation &n) final;
 
 private:
-  void visit(const operation::Permute &) final;
+  void visit(const ir::operation::Permute &) final;
 
 private:
   ir::OperationIndex _op_ind;
 };
 
 } // namespace pass
-} // namespace ir
+} // namespace compiler
 } // namespace onert
 
-#endif // __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_PERMUTATION_ELIMINATION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
index 3578af813..c83a72ada 100644
--- a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
@@ -31,12 +31,12 @@
 
 namespace onert
 {
-namespace ir
+namespace compiler
 {
 namespace pass
 {
 
-void PermutationInsertionPass::callback(const OperandIndex &index, Operand &object)
+void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Operand &object)
 {
   auto &&operand_li = _lowered_graph.getLowerInfo(index);
   assert(operand_li);
@@ -48,10 +48,10 @@ void PermutationInsertionPass::callback(const OperandIndex &index, Operand &obje
     return;
   }
 
-  std::list<OperationIndex> permute_indexes;
+  std::list<ir::OperationIndex> permute_indexes;
 
   // Build a map for all necessary type of operands
-  std::unordered_map<operand::PermuteFactor, OperandIndex> factor_to_index;
+  std::unordered_map<ir::operand::PermuteFactor, ir::OperandIndex> factor_to_index;
   {
     assert(operand_li->def_factors().size() == 1);
     for (auto factor : operand_li->def_factors())
@@ -72,7 +72,7 @@ void PermutationInsertionPass::callback(const OperandIndex &index, Operand &obje
 
   // Update operations' input that uses this operand
   {
-    std::list<OperationIndex> remove_list;
+    std::list<ir::OperationIndex> remove_list;
 
     auto uses = object.getUses();
     for (auto use : uses)
@@ -121,8 +121,8 @@ void PermutationInsertionPass::callback(const OperandIndex &index, Operand &obje
   }
 }
 
-OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &operand_index,
-                                                       const operand::PermuteFactor &factor)
+ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandIndex &operand_index,
+                                                           const ir::operand::PermuteFactor &factor)
 {
   assert(!_graph.isBuildingPhase());
 
@@ -143,14 +143,14 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
   auto output_backend = factor.backend();
   // NOTE Permute may not have specific layout because the layout of input and output may be
   // different.
-  const auto permute_node_layout = Layout::UNKNOWN;
+  const auto permute_node_layout = ir::Layout::UNKNOWN;
   // NOTE If one backend supports several layout, the backend must support Permute operation
   const backend::Backend *permute_node_backend = compiler::BackendManager::get().getControlflow();
   if (input_backend == output_backend)
   {
     permute_node_backend = input_backend;
   }
-  const operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
+  const ir::operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
 
   // Update LowerInfo of input operand
   auto operand_lower_info = _lowered_graph.getLowerInfo(operand_index);
@@ -158,7 +158,7 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
   operand_lower_info->addUsePermuteFactor(permute_node_factor);
 
   // Update LowerInfo of output operand
-  auto out_operand_li = std::make_unique<operand::LowerInfo>();
+  auto out_operand_li = std::make_unique<ir::operand::LowerInfo>();
 
   // The input and output factors of all nodes will be the same except Permute. So Tensor's
   // allocators allocates memory using only the information of def permutation factor now.
@@ -170,13 +170,13 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
   // Insert permute operation to the graph
   const auto input_layout = input_factor.layout();
   const auto output_layout = factor.layout();
-  using Permute = operation::Permute;
+  using Permute = ir::operation::Permute;
   const auto permute_type = [&]() {
-    if (input_layout == Layout::NHWC && output_layout == Layout::NCHW)
+    if (input_layout == ir::Layout::NHWC && output_layout == ir::Layout::NCHW)
     {
       return Permute::Type::NHWC_TO_NCHW;
     }
-    else if (input_layout == Layout::NCHW && output_layout == Layout::NHWC)
+    else if (input_layout == ir::Layout::NCHW && output_layout == ir::Layout::NHWC)
     {
       return Permute::Type::NCHW_TO_NHWC;
     }
@@ -200,7 +200,7 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
     auto &op_seq = _lowered_graph.op_seqs().at(op_seq_index);
     op_seq.setInputs(node.getInputs());
     op_seq.setOutputs(node.getOutputs());
-    _lowered_graph.setLowerInfo(op_seq_index, std::make_unique<operation::LowerInfo>(
+    _lowered_graph.setLowerInfo(op_seq_index, std::make_unique<ir::operation::LowerInfo>(
                                                   permute_node_backend, permute_node_layout));
   }
 
@@ -212,5 +212,5 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
   return node_index;
 }
 } // namespace pass
-} // namespace ir
+} // namespace compiler
 } // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.h b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h
index 6c30c6f12..758515385 100644
--- a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.h
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
-#define __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_PERMUTATION_INSERTION_PASS_H__
+#define __ONERT_COMPILER_PASS_PERMUTATION_INSERTION_PASS_H__
 
 #include "LoweredOperandPass.h"
 #include "compiler/BackendManager.h"
-#include "ir/Operand.h" //for OperationIndex
+#include "ir/Operand.h"
 #include "ir/operand/PermuteFactor.h"
 
 namespace onert
 {
-namespace ir
+namespace compiler
 {
 namespace pass
 {
@@ -36,7 +36,7 @@ public:
 
 public:
   std::string id() override { return "PermutationInsertionPass"; }
-  void callback(const OperandIndex &index, Operand &object) override;
+  void callback(const ir::OperandIndex &index, ir::Operand &object) override;
 
 private:
   /**
@@ -45,14 +45,14 @@ private:
    * @param operand_index is the target operand index for the insertion
    * @param factor is the output operand's backend type and layout
    *
-   * @return OperationIndex
+   * @return ir::OperationIndex
    */
-  OperationIndex insertPermute(const OperandIndex &operand_index,
-                               const operand::PermuteFactor &factor);
+  ir::OperationIndex insertPermute(const ir::OperandIndex &operand_index,
+                                   const ir::operand::PermuteFactor &factor);
 };
 
 } // namespace pass
-} // namespace ir
+} // namespace compiler
 } // namespace onert
 
-#endif // __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_PERMUTATION_INSERTION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/PermutationOperationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc
index 6eb412cf1..c5c95c726 100644
--- a/runtime/onert/core/src/ir/pass/PermutationOperationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc
@@ -23,11 +23,13 @@
 
 namespace onert
 {
-namespace ir
+namespace compiler
 {
 namespace pass
 {
 
+using namespace ir;
+
 void PermutationOperationPass::callback(const OperationIndex &, Operation &node)
 {
   node.accept(*this);
@@ -70,7 +72,7 @@ void PermutationOperationPass::applyExpandRanks(const Operation &node)
                                  "operand used in more than one node");
       // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
       //      a node to extend shape may be inserted in front of this operation
-      const_cast<ir::Shape &>(operand.shape()).extendRank(expanded_rank);
+      const_cast<Shape &>(operand.shape()).extendRank(expanded_rank);
     }
   }
 }
@@ -134,7 +136,7 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
       const auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
       _lowered_graph.setLowerInfo(
           next_op_seq_index,
-          std::make_unique<operation::LowerInfo>(op_seq_li->backend(), op_seq_li->layout()));
+          std::make_unique<ir::operation::LowerInfo>(op_seq_li->backend(), op_seq_li->layout()));
     }
   }
 
@@ -164,8 +166,8 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
     auto &new_op_seq = _lowered_graph.op_seqs().at(new_op_seq_index);
     new_op_seq.setInputs(node.getInputs());
     new_op_seq.setOutputs(node.getOutputs());
-    _lowered_graph.setLowerInfo(new_op_seq_index,
-                                std::make_unique<operation::LowerInfo>(backend, frontend_layout));
+    _lowered_graph.setLowerInfo(
+        new_op_seq_index, std::make_unique<ir::operation::LowerInfo>(backend, frontend_layout));
   }
 
   // Change PermuteFactors of operands of target node
@@ -175,7 +177,7 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
     const auto backend = op_seq_li->backend();
     const operand::PermuteFactor removed_factor{backend, backend_layout};
     const operand::PermuteFactor new_factor{backend, frontend_layout};
-    for (const auto &input : node.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+    for (const auto &input : node.getInputs() | Remove::DUPLICATED | Remove::UNDEFINED)
     {
       bool canRemove = true;
       for (const auto &use : _graph.operands().at(input).getUses())
@@ -227,17 +229,31 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
   }
 }
 
-void PermutationOperationPass::visit(const operation::Add &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::BinaryArithmetic &node)
+{
+  applyExpandRanks(node);
+}
 
-void PermutationOperationPass::visit(const operation::Concat &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::Concat &node) { applyExpandRanks(node); }
 
-void PermutationOperationPass::visit(const operation::Comparison &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::Comparison &node)
+{
+  applyExpandRanks(node);
+}
 
-void PermutationOperationPass::visit(const operation::Div &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::ElementwiseBinary &node)
+{
+  applyExpandRanks(node);
+}
 
-void PermutationOperationPass::visit(const operation::FullyConnected &node)
+void PermutationOperationPass::visit(const ir::operation::ElementwiseUnary &node)
 {
-  const auto &input_ind = node.getInputs().at(operation::FullyConnected::Input::INPUT);
+  applyExpandRanks(node);
+}
+
+void PermutationOperationPass::visit(const ir::operation::FullyConnected &node)
+{
+  const auto &input_ind = node.getInputs().at(ir::operation::FullyConnected::Input::INPUT);
   const auto &input_obj = _graph.operands().at(input_ind);
   const auto &input_shape = input_obj.shape();
 
@@ -247,9 +263,9 @@ void PermutationOperationPass::visit(const operation::FullyConnected &node)
   }
 }
 
-void PermutationOperationPass::visit(const operation::Gather &node)
+void PermutationOperationPass::visit(const ir::operation::Gather &node)
 {
-  const auto &input_ind = node.getInputs().at(operation::Gather::Input::INPUT);
+  const auto &input_ind = node.getInputs().at(ir::operation::Gather::Input::INPUT);
   const auto &input_obj = _graph.operands().at(input_ind);
   const auto &input_shape = input_obj.shape();
 
@@ -263,21 +279,9 @@ void PermutationOperationPass::visit(const operation::Gather &node)
   }
 }
 
-void PermutationOperationPass::visit(const operation::LogicalAnd &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::LogicalNot &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::LogicalOr &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Max &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Min &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Mul &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Pack &node)
+void PermutationOperationPass::visit(const ir::operation::Pack &node)
 {
-  const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT);
+  const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT);
   const auto &input_obj = _graph.operands().at(input_ind);
   const auto &input_shape = input_obj.shape();
 
@@ -291,11 +295,11 @@ void PermutationOperationPass::visit(const operation::Pack &node)
   }
 }
 
-void PermutationOperationPass::visit(const operation::PReLU &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::PReLU &node) { applyExpandRanks(node); }
 
-void PermutationOperationPass::visit(const operation::Reshape &node)
+void PermutationOperationPass::visit(const ir::operation::Reshape &node)
 {
-  const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT);
+  const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT);
   const auto &input_obj = _graph.operands().at(input_ind);
   const auto &input_shape = input_obj.shape();
 
@@ -309,16 +313,14 @@ void PermutationOperationPass::visit(const operation::Reshape &node)
   }
 }
 
-void PermutationOperationPass::visit(const operation::SquaredDifference &node)
+void PermutationOperationPass::visit(const ir::operation::SquaredDifference &node)
 {
   applyExpandRanks(node);
 }
 
-void PermutationOperationPass::visit(const operation::Sub &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Unpack &node)
+void PermutationOperationPass::visit(const ir::operation::Unpack &node)
 {
-  const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT);
+  const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT);
   const auto &input_obj = _graph.operands().at(input_ind);
   const auto &input_shape = input_obj.shape();
 
@@ -333,5 +335,5 @@ void PermutationOperationPass::visit(const operation::Unpack &node)
 }
 
 } // namespace pass
-} // namespace ir
+} // namespace compiler
 } // namespace onert
diff --git a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h
new file mode 100644
index 000000000..2dd76b971
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_PASS_PERMUTATION_OPERATION_PASS_H__
+#define __ONERT_COMPILER_PASS_PERMUTATION_OPERATION_PASS_H__
+
+#include "ir/OperationVisitor.h"
+#include "LoweredOperationPass.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+class PermutationOperationPass : public LoweredOperationPass, public ir::OperationVisitor
+{
+public:
+  using LoweredOperationPass::LoweredOperationPass;
+
+public:
+  std::string id() final { return "PermutationOperationPass"; }
+
+public:
+  void callback(const ir::OperationIndex &i, ir::Operation &n) final;
+
+public:
+  void visit(const ir::operation::BinaryArithmetic &) final;
+  void visit(const ir::operation::Comparison &) final;
+  void visit(const ir::operation::Concat &) final;
+  void visit(const ir::operation::ElementwiseBinary &) final;
+  void visit(const ir::operation::ElementwiseUnary &) final;
+  void visit(const ir::operation::Pack &) final;
+  void visit(const ir::operation::PReLU &) final;
+  void visit(const ir::operation::SquaredDifference &) final;
+  void visit(const ir::operation::Unpack &) final;
+  void visit(const ir::operation::FullyConnected &) final;
+  void visit(const ir::operation::Gather &) final;
+  void visit(const ir::operation::Reshape &) final;
+
+private:
+  void applyExpandRanks(const ir::Operation &);
+  void changeToKeepLayout(const ir::Operation &);
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_PERMUTATION_OPERATION_PASS_H__
diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.h b/runtime/onert/core/src/dumper/dot/DotDumper.h
index 668785a81..fdbca1642 100644
--- a/runtime/onert/core/src/dumper/dot/DotDumper.h
+++ b/runtime/onert/core/src/dumper/dot/DotDumper.h
@@ -15,7 +15,7 @@
  */
 
 #include "ir/Graph.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
 
 #ifndef __ONERT_DUMPER_DOT_DOT_DUMPER_H__
 #define __ONERT_DUMPER_DOT_DOT_DUMPER_H__
@@ -42,7 +42,7 @@ public:
       : _lowered_graph{nullptr}, _graph(graph), _level{level}
   {
   }
-  DotDumper(const ir::LoweredGraph *lowered_graph, Level level)
+  DotDumper(const compiler::LoweredGraph *lowered_graph, Level level)
       : _lowered_graph{lowered_graph}, _graph(_lowered_graph->graph()), _level{level}
   {
   }
@@ -57,7 +57,7 @@ public:
   void dump(const std::string &tag);
 
 private:
-  const ir::LoweredGraph *_lowered_graph;
+  const compiler::LoweredGraph *_lowered_graph;
   const ir::Graph &_graph;
   Level _level;
 };
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.cc b/runtime/onert/core/src/exec/DataflowExecutor.cc
index cb516b53a..a69ae9cdb 100644
--- a/runtime/onert/core/src/exec/DataflowExecutor.cc
+++ b/runtime/onert/core/src/exec/DataflowExecutor.cc
@@ -78,11 +78,13 @@ bool DataflowExecutor::noWaitingJobs()
 }
 
 DataflowExecutor::DataflowExecutor(
-    std::unique_ptr<ir::LoweredGraph> lowered_graph,
+    std::unique_ptr<compiler::LoweredGraph> lowered_graph,
     const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
     const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-    const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map)
-    : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_builders},
+    const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs,
+    compiler::CodeMap &&code_map)
+    : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
+                   std::move(tensor_mgrs)},
       _code_map{std::move(code_map)}
 {
   VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl;
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h
index aebb03c23..8d60e3e4b 100644
--- a/runtime/onert/core/src/exec/DataflowExecutor.h
+++ b/runtime/onert/core/src/exec/DataflowExecutor.h
@@ -49,10 +49,11 @@ public:
    * @param tensor_builders Tensor builders that are currently used
    * @param code_map OpSequence and its code map
    */
-  DataflowExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+  DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                    const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
                    const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-                   const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map);
+                   const compiler::TensorRegistries &tensor_regs,
+                   backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map);
 
   void executeImpl() override;
 
diff --git a/runtime/onert/core/src/exec/DynamicShapeInference.cc b/runtime/onert/core/src/exec/DynamicShapeInference.cc
index 5ec7012ee..70bddfce4 100644
--- a/runtime/onert/core/src/exec/DynamicShapeInference.cc
+++ b/runtime/onert/core/src/exec/DynamicShapeInference.cc
@@ -100,17 +100,6 @@ void DynamicShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
   assert(output->buffer() != nullptr);
 }
 
-void DynamicShapeInferer::visit(const ir::operation::Abs &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Abs::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Add &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Add::Input::LHS),
-                           op.getInputs().at(ir::operation::Add::Input::RHS));
-}
-
 void DynamicShapeInferer::visit(const ir::operation::ArgMax &op)
 {
   const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
@@ -155,6 +144,12 @@ void DynamicShapeInferer::visit(const ir::operation::BatchMatMul &op)
   dynamicTensorManagerOf(output)->applyShape(output_index, new_shape);
 }
 
+void DynamicShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
+{
+  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS),
+                           op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS));
+}
+
 void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op)
 {
   auto output_ind = op.getOutputs().at(0);
@@ -179,11 +174,6 @@ void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op)
   assert(output->buffer() != nullptr);
 }
 
-void DynamicShapeInferer::visit(const ir::operation::Cast &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cast::INPUT));
-}
-
 void DynamicShapeInferer::visit(const ir::operation::Comparison &op)
 {
   handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0),
@@ -292,20 +282,20 @@ void DynamicShapeInferer::visit(const ir::operation::Conv2D &op)
   assert(output->buffer() != nullptr);
 }
 
-void DynamicShapeInferer::visit(const ir::operation::Cos &op)
+void DynamicShapeInferer::visit(const ir::operation::ElementwiseActivation &op)
 {
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cos::Input::INPUT));
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::INPUT));
 }
 
-void DynamicShapeInferer::visit(const ir::operation::Div &op)
+void DynamicShapeInferer::visit(const ir::operation::ElementwiseBinary &op)
 {
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Div::Input::LHS),
-                           op.getInputs().at(ir::operation::Div::Input::RHS));
+  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS),
+                           op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS));
 }
 
-void DynamicShapeInferer::visit(const ir::operation::Exp &op)
+void DynamicShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
 {
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Exp::Input::INPUT));
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT));
 }
 
 void DynamicShapeInferer::visit(const ir::operation::ExpandDims &op)
@@ -430,27 +420,6 @@ void DynamicShapeInferer::visit(const ir::operation::Gather &op)
   assert(output->buffer() != nullptr);
 }
 
-void DynamicShapeInferer::visit(const ir::operation::Log &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Log::Input::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::LogicalNot &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::LogicalNot::Input::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::LogicalOr &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::LogicalOr::Input::INPUT0),
-                           op.getInputs().at(ir::operation::LogicalOr::Input::INPUT1));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Logistic &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Logistic::INPUT));
-}
-
 void DynamicShapeInferer::visit(const ir::operation::L2Normalization &op)
 {
   handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::INPUT));
@@ -461,29 +430,6 @@ void DynamicShapeInferer::visit(const ir::operation::MatrixBandPart &op)
   handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::INPUT));
 }
 
-void DynamicShapeInferer::visit(const ir::operation::Max &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Max::Input::LHS),
-                           op.getInputs().at(ir::operation::Max::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Min &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Min::Input::LHS),
-                           op.getInputs().at(ir::operation::Min::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Mul &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Mul::Input::LHS),
-                           op.getInputs().at(ir::operation::Mul::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Neg &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Neg::Input::INPUT));
-}
-
 void DynamicShapeInferer::visit(const ir::operation::OneHot &op)
 {
   auto output_ind = op.getOutputs().at(0);
@@ -766,7 +712,7 @@ void DynamicShapeInferer::visit(const ir::operation::ResizeBilinear &op)
   if (output_shape != output->getShape() || output->buffer() == nullptr)
   {
     // change on output shape
-    _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+    dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
   }
   assert(output->buffer() != nullptr);
 }
@@ -776,16 +722,6 @@ void DynamicShapeInferer::visit(const ir::operation::Reverse &op)
   handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::INPUT));
 }
 
-void DynamicShapeInferer::visit(const ir::operation::Round &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Round::Input::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::RSQRT &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::RSQRT::INPUT));
-}
-
 void DynamicShapeInferer::visit(const ir::operation::Select &op)
 {
   const auto input_cond_idx = op.getInputs().at(ir::operation::Select::Input::CONDITION);
@@ -836,11 +772,6 @@ void DynamicShapeInferer::visit(const ir::operation::Shape &op)
   assert(output->buffer() != nullptr);
 }
 
-void DynamicShapeInferer::visit(const ir::operation::Sin &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Sin::Input::INPUT));
-}
-
 void DynamicShapeInferer::visit(const ir::operation::Slice &op)
 {
   const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
@@ -1003,17 +934,6 @@ void DynamicShapeInferer::visit(const ir::operation::StridedSlice &op)
   assert(output->buffer() != nullptr);
 }
 
-void DynamicShapeInferer::visit(const ir::operation::Sub &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Sub::Input::LHS),
-                           op.getInputs().at(ir::operation::Sub::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Tanh &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Tanh::INPUT));
-}
-
 void DynamicShapeInferer::visit(const ir::operation::Tile &op)
 {
   auto output_ind = op.getOutputs().at(0);
@@ -1091,10 +1011,5 @@ void DynamicShapeInferer::visit(const ir::operation::Unpack &op)
   }
 }
 
-void DynamicShapeInferer::visit(const ir::operation::ZerosLike &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ZerosLike::INPUT));
-}
-
 } // namespace exec
 } // namespace onert
diff --git a/runtime/onert/core/src/exec/Execution.cc b/runtime/onert/core/src/exec/Execution.cc
index 5b401ecf8..7feb3ab68 100644
--- a/runtime/onert/core/src/exec/Execution.cc
+++ b/runtime/onert/core/src/exec/Execution.cc
@@ -38,7 +38,10 @@ void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_
   if (_io_desc.inputs.at(index.value()) != 0)
     throw std::runtime_error("Error in calling order");
 
-  _io_desc.input_shape_signature[index] = new_shape;
+  // This will be used later to set input tensor dynamic
+  // Note that 'compiled' model will not be updated with new_shape
+  // but new_shape will change model input shape while 'running' the model
+  _io_desc.dynamic_input_shapes[index] = new_shape;
 }
 
 // TODO Remove default parameter
@@ -54,8 +57,8 @@ void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t le
   // if input_shape_sig is set, input_shape_sig overrides shape in info
   // note: input_shape_sig contains shape passed by nnfw_set_input_tensorinfo()
   {
-    auto input_shape_sig = _io_desc.input_shape_signature.find(index);
-    auto size_required = (input_shape_sig != _io_desc.input_shape_signature.end())
+    auto input_shape_sig = _io_desc.dynamic_input_shapes.find(index);
+    auto size_required = (input_shape_sig != _io_desc.dynamic_input_shapes.end())
                              ? input_shape_sig->second.num_elements() *
                                    onert::ir::sizeOfDataType(info.typeInfo().type())
                              : info.total_size();
@@ -154,8 +157,8 @@ bool Execution::isFinished(void) const { return finished; }
 
 ir::Shape Execution::getInputShape(ir::IOIndex ind) const
 {
-  auto itr = _io_desc.input_shape_signature.find(ind);
-  if (itr == _io_desc.input_shape_signature.end())
+  auto itr = _io_desc.dynamic_input_shapes.find(ind);
+  if (itr == _io_desc.dynamic_input_shapes.end())
   {
     auto operand_idx = primary_subgraph().getInputs().at(ind.value());
     return primary_subgraph().operands().at(operand_idx).shape();
diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc
index 864ccb31a..f835a9675 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.cc
+++ b/runtime/onert/core/src/exec/ExecutorBase.cc
@@ -26,12 +26,14 @@ namespace onert
 namespace exec
 {
 
-ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
+ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
                            const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
                            const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-                           const compiler::TensorBuilders &tensor_builders)
+                           const compiler::TensorRegistries &tensor_regs,
+                           backend::TensorManagerSet &&tensor_mgrs)
     : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()},
-      _input_tensors{input_tensors}, _output_tensors{output_tensors}, _mutex()
+      _input_tensors{input_tensors}, _output_tensors{output_tensors},
+      _tensor_mgrs{std::move(tensor_mgrs)}, _mutex()
 {
   // TODO Fix the way of knowing whether it is primary or not
   bool primary_executor = !(_input_tensors.empty() && _output_tensors.empty());
@@ -41,23 +43,10 @@ ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
       std::vector<std::shared_ptr<backend::ITensor>> list;
       for (auto ind : ind_seq)
       {
-        std::shared_ptr<backend::ITensor> tensor;
-        for (auto &tensor_builder : tensor_builders)
-        {
-          auto tensor_registry = tensor_builder->tensorRegistry();
-          assert(tensor_registry);
-          tensor = tensor_registry->getNativeITensor(ind);
-          if (tensor != nullptr)
-          {
-            if (tensor_builder->supportDynamicTensor())
-            {
-              DynAllocInfo dyn_alloc_info{ind, tensor_builder->dynamicTensorManager()};
-              _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
-            }
-            break;
-          }
-        }
+        std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind);
         assert(tensor != nullptr);
+        DynAllocInfo dyn_alloc_info{ind};
+        _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
         list.push_back(tensor);
       }
       return list;
@@ -66,23 +55,10 @@ ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
       std::vector<std::shared_ptr<backend::ITensor>> list;
       for (auto ind : ind_seq)
       {
-        std::shared_ptr<backend::ITensor> tensor;
-        for (auto &tensor_builder : tensor_builders)
-        {
-          auto tensor_registry = tensor_builder->tensorRegistry();
-          assert(tensor_registry);
-          tensor = tensor_registry->getNativeITensor(ind);
-          if (tensor != nullptr)
-          {
-            if (tensor_builder->supportDynamicTensor())
-            {
-              DynAllocInfo dyn_alloc_info{ind, tensor_builder->dynamicTensorManager()};
-              _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
-            }
-            break;
-          }
-        }
+        std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind);
         assert(tensor != nullptr);
+        DynAllocInfo dyn_alloc_info{ind};
+        _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
         list.push_back(tensor);
       }
       return list;
@@ -92,42 +68,23 @@ ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
   }
   else
   {
-    // If primary graph, all the inputs and outputs belong to controlflow backend
-    auto cf_dyn_tensor_builder = tensor_builders.getControlflowTensorBuilder();
-    assert(cf_dyn_tensor_builder);
-
     assert(input_tensors.size() == _graph.getInputs().size());
     assert(output_tensors.size() == _graph.getOutputs().size());
     for (uint32_t i = 0; i < input_tensors.size(); i++)
     {
       auto tensor = input_tensors[i];
       auto ind = _graph.getInputs().at(i);
-      DynAllocInfo dyn_alloc_info{ind, cf_dyn_tensor_builder->dynamicTensorManager()};
+      DynAllocInfo dyn_alloc_info{ind};
       _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
     }
     for (uint32_t i = 0; i < output_tensors.size(); i++)
     {
       auto tensor = output_tensors[i];
       auto ind = _graph.getOutputs().at(i);
-      DynAllocInfo dyn_alloc_info{ind, cf_dyn_tensor_builder->dynamicTensorManager()};
+      DynAllocInfo dyn_alloc_info{ind};
       _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
     }
   }
-
-  // Prepare each TensorManager on each backend
-  for (auto &tensor_builder : tensor_builders)
-  {
-    auto s_tensor_manager = tensor_builder->releaseStaticTensorManager();
-    if (s_tensor_manager != nullptr)
-      _tensor_mgrs.insert(std::move(s_tensor_manager));
-
-    if (tensor_builder->supportDynamicTensor())
-    {
-      auto d_tensor_manager = tensor_builder->releaseDynamicTensorManager();
-      if (d_tensor_manager != nullptr)
-        _tensor_mgrs.insert(std::move(d_tensor_manager));
-    }
-  }
 }
 
 void ExecutorBase::execute(const std::vector<std::shared_ptr<backend::ITensor>> &src_tensors,
@@ -192,8 +149,8 @@ void ExecutorBase::execute(const IODescription &desc)
     // TODO Remove dynamic_cast
     auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_input_tensors[i]);
     assert(tensor);
-    auto input_shape = desc.input_shape_signature.find(ir::IOIndex{i});
-    if (input_shape != desc.input_shape_signature.end())
+    auto input_shape = desc.dynamic_input_shapes.find(ir::IOIndex{i});
+    if (input_shape != desc.dynamic_input_shapes.end())
     {
       tensor->set_dynamic();
       tensor->setShape(input_shape->second);
@@ -258,8 +215,8 @@ void ExecutorBase::execute(const IODescription &desc)
  */
 void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescription &desc)
 {
-  auto shape_sig_found = desc.input_shape_signature.find(io_ind);
-  if (shape_sig_found != desc.input_shape_signature.end())
+  auto shape_sig_found = desc.dynamic_input_shapes.find(io_ind);
+  if (shape_sig_found != desc.dynamic_input_shapes.end())
   {
     auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[io_ind.value()]);
     if (dyn_alloc_info == _input_to_dyn_alloc_info.end())
@@ -269,7 +226,9 @@ void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescript
     auto changed_input_shape = shape_sig_found->second;
     auto operand_ind = dyn_alloc_info->second.ind;
 
-    dyn_alloc_info->second.dyn_tensor_manager->applyShape(operand_ind, changed_input_shape);
+    auto dyn_tensor_manager = _input_tensors[io_ind.value()]->dynamic_tensor_manager();
+    assert(dyn_tensor_manager);
+    dyn_tensor_manager->applyShape(operand_ind, changed_input_shape);
   }
 }
 
diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h
index 080c9bbdd..a13be7dbf 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.h
+++ b/runtime/onert/core/src/exec/ExecutorBase.h
@@ -25,7 +25,7 @@
 #include "Sink.h"
 #include "ShapeConverter.h"
 #include "exec/IExecutor.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
 #include "ir/LowerInfoMap.h"
 #include "backend/IConfig.h"
 #include "backend/Backend.h"
@@ -33,9 +33,8 @@
 #include "exec/IFunction.h"
 #include "backend/IDynamicTensorManager.h"
 #include "backend/ITensorManager.h"
-#include "backend/ITensorBuilder.h"
 #include "exec/ExecutionObservee.h"
-#include "compiler/TensorBuilders.h"
+#include "compiler/TensorRegistries.h"
 #include <list>
 
 namespace onert
@@ -51,10 +50,11 @@ public:
    * @param graph Graph object
    * @param tensor_builders Tensor builders that are currently used
    */
-  ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
+  ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
                const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
                const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-               const compiler::TensorBuilders &tensor_builders);
+               const compiler::TensorRegistries &tensor_regs,
+               backend::TensorManagerSet &&tensor_mgrs);
 
   virtual ~ExecutorBase() = default;
 
@@ -102,7 +102,7 @@ protected:
 protected:
   ExecutionObservee _subject;
   std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
-  std::unique_ptr<ir::LoweredGraph> _lowered_graph;
+  std::unique_ptr<compiler::LoweredGraph> _lowered_graph;
   const ir::Graph &_graph;
   std::vector<std::shared_ptr<backend::ITensor>> _input_tensors;
   std::vector<std::shared_ptr<backend::ITensor>> _output_tensors;
diff --git a/runtime/onert/core/src/exec/FunctionSequence.cc b/runtime/onert/core/src/exec/FunctionSequence.cc
index d413e8162..fb31f7582 100644
--- a/runtime/onert/core/src/exec/FunctionSequence.cc
+++ b/runtime/onert/core/src/exec/FunctionSequence.cc
@@ -28,7 +28,8 @@ namespace exec
 
 void FunctionSequence::run()
 {
-  if (_enable_dynamic_shape_inferer)
+  // TODO Find out when `_enable_dynamic_shape_inferer` is true but `_dynamic_tensor_ctx` is false
+  if (_enable_dynamic_shape_inferer && _dynamic_tensor_ctx)
   {
     if (_dynamic_tensor_ctx->op_seq->size() != _functions.size())
       throw std::runtime_error("operation and functions should be mapped one by one");
diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h
index 5c099bc16..c224d3f4f 100644
--- a/runtime/onert/core/src/exec/LinearExecutor.h
+++ b/runtime/onert/core/src/exec/LinearExecutor.h
@@ -46,12 +46,14 @@ public:
    * @param tensor_builders Tensor builders that are currently used
    * @param code_map OpSequence and its code map
    */
-  LinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+  LinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                  const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
                  const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-                 const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map,
+                 const compiler::TensorRegistries &tensor_regs,
+                 backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map,
                  const std::vector<ir::OpSequenceIndex> &order)
-      : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_builders}
+      : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
+                     std::move(tensor_mgrs)}
   {
     for (auto index : order)
     {
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.cc b/runtime/onert/core/src/exec/ParallelExecutor.cc
index b5d81778f..ab234aacd 100644
--- a/runtime/onert/core/src/exec/ParallelExecutor.cc
+++ b/runtime/onert/core/src/exec/ParallelExecutor.cc
@@ -60,12 +60,13 @@ void ParallelExecutor::notify(uint32_t finished_job_id)
 }
 
 ParallelExecutor::ParallelExecutor(
-    std::unique_ptr<ir::LoweredGraph> lowered_graph,
+    std::unique_ptr<compiler::LoweredGraph> lowered_graph,
     const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
     const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-    const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map)
-    : DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_builders,
-                       std::move(code_map)}
+    const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs,
+    compiler::CodeMap &&code_map)
+    : DataflowExecutor{std::move(lowered_graph), input_tensors,      output_tensors, tensor_regs,
+                       std::move(tensor_mgrs),   std::move(code_map)}
 {
   VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl;
 }
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h
index 462cbc6a8..929edfce9 100644
--- a/runtime/onert/core/src/exec/ParallelExecutor.h
+++ b/runtime/onert/core/src/exec/ParallelExecutor.h
@@ -50,10 +50,11 @@ public:
    * @param tensor_builders Tensor builders that are currently used
    * @param code_map OpSequence and its code map
    */
-  ParallelExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+  ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                    const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
                    const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-                   const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map);
+                   const compiler::TensorRegistries &tensor_regs,
+                   backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map);
 
   void executeImpl() override;
 
diff --git a/runtime/onert/core/src/exec/feature/nchw/Reader.h b/runtime/onert/core/src/exec/feature/nchw/Reader.h
index 48642d8ef..7be9df4d5 100644
--- a/runtime/onert/core/src/exec/feature/nchw/Reader.h
+++ b/runtime/onert/core/src/exec/feature/nchw/Reader.h
@@ -33,7 +33,7 @@ namespace feature
 namespace nchw
 {
 
-template <typename T> class Reader final : public feature::Reader<T>
+template <typename T> class Reader : public feature::Reader<T>
 {
 public:
   // Construct for buffer of model inputs
@@ -68,15 +68,14 @@ public:
   }
 
 public:
-  T at(uint32_t ch, uint32_t row, uint32_t col) const override
+  T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const final
   {
-    const auto offset = feature_index_to_byte_offset(0, ch, row, col);
-
-    const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
-    return *ptr;
+    return getRef(batch, ch, row, col);
   }
-  T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
+  T at(uint32_t ch, uint32_t row, uint32_t col) const final { return getRef(0, ch, row, col); }
+
+protected:
+  const T &getRef(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const
   {
     const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
 
diff --git a/runtime/onert/core/src/exec/feature/nchw/View.h b/runtime/onert/core/src/exec/feature/nchw/View.h
index ff55de199..dbaf1a91e 100644
--- a/runtime/onert/core/src/exec/feature/nchw/View.h
+++ b/runtime/onert/core/src/exec/feature/nchw/View.h
@@ -17,7 +17,7 @@
 #ifndef __ONERT_EXEC_FEATURE_NCHW_VIEW_H__
 #define __ONERT_EXEC_FEATURE_NCHW_VIEW_H__
 
-#include "../Reader.h"
+#include "Reader.h"
 
 #include "backend/ITensor.h"
 #include "ir/Shape.h"
@@ -34,99 +34,31 @@ namespace feature
 namespace nchw
 {
 
-template <typename T> class View final : public feature::Reader<T>
+template <typename T> class View final : public Reader<T>
 {
 public:
   // Construct for buffer of model inputs
-  View(const ir::FeatureShape &shape, T *ptr, size_t len)
-      : _shape{shape}, _ptr{reinterpret_cast<uint8_t *>(ptr)}, _len{len}
+  View(const ir::FeatureShape &shape, T *ptr, size_t len) : Reader<T>{shape, ptr, len}
   {
-    assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
-
-    _strides.W = sizeof(T);
-    _strides.H = shape.W * sizeof(T);
-    _strides.C = shape.W * shape.H * sizeof(T);
-    _strides.N = shape.W * shape.H * shape.C * sizeof(T);
+    // DO NOTHING
   }
 
   // Construct for backend tensor
-  View(::onert::backend::ITensor *tensor)
-      : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
-  {
-    assert(tensor->layout() == ir::Layout::NCHW);
-
-    const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
-    _strides.W = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
-    _strides.H = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
-    _strides.C = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
-    _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
-    _shape.W = tensor->dimension(3);
-    _shape.H = tensor->dimension(2);
-    _shape.C = tensor->dimension(1);
-    _shape.N = tensor->dimension(0);
-  }
-
-public:
-  T at(uint32_t ch, uint32_t row, uint32_t col) const override
+  View(::onert::backend::ITensor *tensor) : Reader<T>{tensor}
   {
-    const auto offset = feature_index_to_byte_offset(0, ch, row, col);
-
-    T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
-    return *ptr;
-  }
-  T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
-  {
-    const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
-
-    T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
-    return *ptr;
+    // DO NOTHING
   }
 
 public:
-  T &at(uint32_t ch, uint32_t row, uint32_t col)
-  {
-    const auto offset = feature_index_to_byte_offset(0, ch, row, col);
-
-    T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
-    return *ptr;
-  }
+  using Reader<T>::at;
   T &at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col)
   {
-    const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
-
-    T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
-    return *ptr;
+    return const_cast<T &>(Reader<T>::getRef(batch, ch, row, col));
   }
-
-private:
-  size_t feature_index_to_byte_offset(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const
+  T &at(uint32_t ch, uint32_t row, uint32_t col)
   {
-    assert(1u * _shape.N > batch); // shape.N > batch
-    assert(1u * _shape.C > ch);    // shape.C > ch
-    assert(1u * _shape.H > row);   // shape.H > row
-    assert(1u * _shape.W > col);   // shape.W > col
-
-    uint32_t res = 0;
-    res += batch * _strides.N;
-    res += ch * _strides.C;
-    res += row * _strides.H;
-    res += col * _strides.W;
-
-    return res;
+    return const_cast<T &>(Reader<T>::getRef(0, ch, row, col));
   }
-
-private:
-  // TODO Remove _shape
-  ir::FeatureShape _shape;
-  using Strides = ir::FeatureShape;
-  Strides _strides;
-  uint8_t *_ptr;
-  size_t _len;
 };
 
 } // namespace nchw
diff --git a/runtime/onert/core/src/exec/feature/nhwc/Reader.h b/runtime/onert/core/src/exec/feature/nhwc/Reader.h
index ef27992c3..7730cee72 100644
--- a/runtime/onert/core/src/exec/feature/nhwc/Reader.h
+++ b/runtime/onert/core/src/exec/feature/nhwc/Reader.h
@@ -34,7 +34,7 @@ namespace feature
 namespace nhwc
 {
 
-template <typename T> class Reader final : public feature::Reader<T>
+template <typename T> class Reader : public feature::Reader<T>
 {
 public:
   // Construct for buffer of model inputs
@@ -70,15 +70,14 @@ public:
   }
 
 public:
-  T at(uint32_t row, uint32_t col, uint32_t ch) const override
+  T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const final
   {
-    const auto offset = feature_index_to_byte_offset(0, row, col, ch);
-
-    const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
-    return *ptr;
+    return getRef(batch, row, col, ch);
   }
-  T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const override
+  T at(uint32_t row, uint32_t col, uint32_t ch) const final { return getRef(0, row, col, ch); }
+
+protected:
+  const T &getRef(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const
   {
     const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
 
diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.h b/runtime/onert/core/src/exec/feature/nhwc/View.h
index a09961a84..72c8c3415 100644
--- a/runtime/onert/core/src/exec/feature/nhwc/View.h
+++ b/runtime/onert/core/src/exec/feature/nhwc/View.h
@@ -35,101 +35,31 @@ namespace feature
 namespace nhwc
 {
 
-template <typename T> class View final : public feature::Reader<T>
+template <typename T> class View final : public Reader<T>
 {
 public:
   // Construct for buffer of model inputs
-  View(const ir::FeatureShape &shape, T *ptr, size_t len)
-      : _shape{shape}, _ptr{reinterpret_cast<uint8_t *>(ptr)}, _len{len}
+  View(const ir::FeatureShape &shape, T *ptr, size_t len) : Reader<T>{shape, ptr, len}
   {
-    UNUSED_RELEASE(len); // Workaround for unused variable in release mode
-    assert(shape.N * shape.H * shape.W * shape.C * sizeof(T) == len);
-
-    // No padding
-    _strides.C = sizeof(T);
-    _strides.W = shape.C * sizeof(T);
-    _strides.H = shape.C * shape.W * sizeof(T);
-    _strides.N = shape.C * shape.W * shape.H * sizeof(T);
+    // DO NOTHING
   }
 
   // Construct for backend tensor
-  View(backend::ITensor *tensor)
-      : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
+  View(backend::ITensor *tensor) : Reader<T>{tensor}
   {
-    assert(tensor->layout() == ir::Layout::NHWC);
-
-    const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
-    _strides.C = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
-    _strides.W = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
-    _strides.H = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
-    _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
-    _shape.C = tensor->dimension(3);
-    _shape.W = tensor->dimension(2);
-    _shape.H = tensor->dimension(1);
-    _shape.N = tensor->dimension(0);
+    // DO NOTHING
   }
 
 public:
-  T at(uint32_t row, uint32_t col, uint32_t ch) const override
-  {
-    const auto offset = feature_index_to_byte_offset(0, row, col, ch);
-
-    const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
-    return *ptr;
-  }
-  T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const override
-  {
-    const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
-
-    const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
-    return *ptr;
-  }
-
-  T &at(uint32_t row, uint32_t col, uint32_t ch)
-  {
-    const auto offset = feature_index_to_byte_offset(0, row, col, ch);
-
-    T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
-    return *ptr;
-  }
-
+  using Reader<T>::at;
   T &at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch)
   {
-    const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
-
-    T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
-    return *ptr;
+    return const_cast<T &>(Reader<T>::getRef(batch, row, col, ch));
   }
-
-private:
-  size_t feature_index_to_byte_offset(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const
+  T &at(uint32_t row, uint32_t col, uint32_t ch)
   {
-    assert(1u * _shape.N > batch); // shape.N > batch
-    assert(1u * _shape.H > row);   // shape.H > row
-    assert(1u * _shape.W > col);   // shape.W > col
-    assert(1u * _shape.C > ch);    // shape.C > ch
-
-    uint32_t res = 0;
-    res += batch * _strides.N;
-    res += row * _strides.H;
-    res += col * _strides.W;
-    res += ch * _strides.C;
-
-    return res;
+    return const_cast<T &>(Reader<T>::getRef(0, row, col, ch));
   }
-
-private:
-  // TODO Remove _shape
-  ir::FeatureShape _shape;
-  using Strides = ir::FeatureShape;
-  Strides _strides;
-  uint8_t *_ptr;
-  size_t _len;
 };
 
 } // namespace nhwc
diff --git a/runtime/onert/core/src/interp/InterpOps.lst b/runtime/onert/core/src/interp/InterpOps.lst
index 5f646b83f..0714df38a 100644
--- a/runtime/onert/core/src/interp/InterpOps.lst
+++ b/runtime/onert/core/src/interp/InterpOps.lst
@@ -22,43 +22,32 @@
 //
 // Same list with Operations.lst
 // Make comment out if operation is not supported in interpreter
-INTERP_OP(Add)
-INTERP_OP(Sub)
+INTERP_OP(BinaryArithmetic)
 //INTERP_OP(BatchToSpaceND)
 //INTERP_OP(Cast)
 INTERP_OP(Conv2D)
 INTERP_OP(DepthwiseConv2D)
-INTERP_OP(AvgPool2D)
-INTERP_OP(MaxPool2D)
+INTERP_OP(Pool2D)
 INTERP_OP(Concat)
 INTERP_OP(FullyConnected)
 //INTERP_OP(Reduce)
 INTERP_OP(Reshape)
-INTERP_OP(Mul)
 INTERP_OP(Softmax)
 //INTERP_OP(Squeeze)
 //INTERP_OP(Slice)
 //INTERP_OP(StridedSlice)
-INTERP_OP(Tanh)
-INTERP_OP(Logistic)
-//INTERP_OP(Div)
+INTERP_OP(ElementwiseActivation)
 //INTERP_OP(Transpose)
 //INTERP_OP(Exp)
 //INTERP_OP(Comparison)
-//INTERP_OP(LogicalAnd)
-//INTERP_OP(LogicalOr)
 //INTERP_OP(LogicalNot)
 //INTERP_OP(LSTM)
 //INTERP_OP(RSQRT)
-INTERP_OP(ReLU)
 //INTERP_OP(ResizeBilinear)
-INTERP_OP(ReLU1)
-INTERP_OP(ReLU6)
 //INTERP_OP(RNN)
 //INTERP_OP(Floor)
 //INTERP_OP(SpaceToBatchND)
 //INTERP_OP(SpaceToDepth)
-//INTERP_OP(L2Pool2D)
 //INTERP_OP(EmbeddingLookup)
 //INTERP_OP(L2Normalization)
 //INTERP_OP(HashtableLookup)
@@ -81,6 +70,4 @@ INTERP_OP(Gather)
 INTERP_OP(Pad)
 //INTERP_OP(Custom)
 //INTERP_OP(Permute)
-//INTERP_OP(Min)
-//INTERP_OP(Max)
 //INTERP_OP(OneHot)
diff --git a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
index 44c955421..86e883524 100644
--- a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
+++ b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
@@ -19,9 +19,7 @@
 #include "OperationUtil.h"
 
 #include "interp/Registration.h"
-#include "ir/operation/Add.h"
-#include "ir/operation/Sub.h"
-#include "ir/operation/Mul.h"
+#include "ir/operation/BinaryArithmetic.h"
 #include "misc/polymorphic_downcast.h"
 #include "cker/Types.h"
 
@@ -39,12 +37,13 @@ enum class OpType
   MUL
 };
 
-template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation &node)
+void prepare(ExecEnv *env, const ir::Operation &node)
 {
-  const auto &add_node = nnfw::misc::polymorphic_downcast<const node_type &>(node);
+  const auto &arithmetic_node =
+      nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
 
-  const auto lhs_index = node.getInputs().at(add_node.LHS);
-  const auto rhs_index = node.getInputs().at(add_node.RHS);
+  const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
+  const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
   const auto out_index = node.getOutputs().at(0);
 
   const auto lhs_tensor = env->tensorAt(lhs_index);
@@ -54,7 +53,7 @@ template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation
   // TODO Util function to compare TensorInfo
   if (lhs_tensor->data_type() != rhs_tensor->data_type())
   {
-    throw std::runtime_error{"Interp(Add): Different input types"};
+    throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Different input types"};
   }
 
   bool try_broadcast = (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape());
@@ -65,7 +64,7 @@ template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation
                                         rhs_tensor->tensorInfo().shape(), success);
     if (!success)
     {
-      throw std::runtime_error{"Interp(Add): Fail to brodcasting"};
+      throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Fail to brodcasting"};
     }
 
     auto output_info =
@@ -86,7 +85,7 @@ template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation
   // TODO Util function to compare TensorInfo
   if (lhs_tensor->data_type() != out_tensor->data_type())
   {
-    throw std::runtime_error{"Interp(Add): Invalid output type"};
+    throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Invalid output type"};
   }
 }
 
@@ -103,9 +102,9 @@ inline void setActivationParams(int32_t min, int32_t max,
   params->quantized_activation_max = max;
 }
 
-template <typename raw_type, typename param_type, OpType op_type>
+template <typename raw_type, OpType op_type>
 void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor *out_tensor,
-            const param_type &param)
+            const ir::operation::BinaryArithmetic::Param &param)
 {
   const auto lhs_buffer = lhs_tensor->bufferRO();
   const auto rhs_buffer = rhs_tensor->bufferRO();
@@ -146,13 +145,11 @@ void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor
                                                out_shape, out_ptr);
 }
 
-template <typename node_type, typename param_type, OpType op_type>
-void invokeAdd(const ExecEnv *env, const ir::Operation &node)
+template <OpType op_type>
+void invokeBinaryArithmetic(const ExecEnv *env, const ir::operation::BinaryArithmetic &node)
 {
-  const auto &arithmetic_node = nnfw::misc::polymorphic_downcast<const node_type &>(node);
-
-  const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
-  const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
+  const auto lhs_index = node.getInputs().at(node.LHS);
+  const auto rhs_index = node.getInputs().at(node.RHS);
   const auto out_index = node.getOutputs().at(0);
   const auto lhs_tensor = env->tensorAt(lhs_index);
   const auto rhs_tensor = env->tensorAt(rhs_index);
@@ -161,38 +158,46 @@ void invokeAdd(const ExecEnv *env, const ir::Operation &node)
 
   if (data_type == ir::DataType::INT32)
   {
-    invoke<int32_t, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor,
-                                         arithmetic_node.param());
+    invoke<int32_t, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
   }
   else if (data_type == ir::DataType::FLOAT32)
   {
-    invoke<float, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor, arithmetic_node.param());
+    invoke<float, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
   }
   else
   {
     throw std::runtime_error{"NYI: Unsupported data type"};
   }
 }
-} // namespace
 
-OpKernel *getAdd()
+void invokeBinaryArithmeticOps(const ExecEnv *env, const ir::Operation &node)
 {
-  static OpKernel kernel = {prepareAdd<ir::operation::Add>,
-                            invokeAdd<ir::operation::Add, ir::operation::Add::Param, OpType::ADD>};
-  return &kernel;
-}
+  const auto &arithmetic_node =
+      nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
 
-OpKernel *getSub()
-{
-  static OpKernel kernel = {prepareAdd<ir::operation::Sub>,
-                            invokeAdd<ir::operation::Sub, ir::operation::Sub::Param, OpType::SUB>};
-  return &kernel;
+  switch (arithmetic_node.param().arithmetic_type)
+  {
+    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+      invokeBinaryArithmetic<OpType::ADD>(env, arithmetic_node);
+      break;
+    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+      invokeBinaryArithmetic<OpType::SUB>(env, arithmetic_node);
+      break;
+    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+      invokeBinaryArithmetic<OpType::MUL>(env, arithmetic_node);
+      break;
+    default:
+      throw std::runtime_error{"Interp(BinaryArithmetic): NYI unsupported operation " +
+                               arithmetic_node.name()};
+      break;
+  }
 }
 
-OpKernel *getMul()
+} // namespace
+
+OpKernel *getBinaryArithmetic()
 {
-  static OpKernel kernel = {prepareAdd<ir::operation::Mul>,
-                            invokeAdd<ir::operation::Mul, ir::operation::Mul::Param, OpType::MUL>};
+  static OpKernel kernel = {prepare, invokeBinaryArithmeticOps};
   return &kernel;
 }
 
diff --git a/runtime/onert/core/src/interp/operations/UnaryActivations.cc b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
index ea5e2417b..c8773bef4 100644
--- a/runtime/onert/core/src/interp/operations/UnaryActivations.cc
+++ b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
@@ -20,10 +20,11 @@
 
 #include "interp/Registration.h"
 
-#include "ir/operation/ReLU.h"
-#include "ir/operation/ReLU1.h"
-#include "ir/operation/ReLU6.h"
-#include "ir/operation/Tanh.h"
+#include "ir/operation/ElementwiseActivation.h"
+
+#include <misc/polymorphic_downcast.h>
+#include <cker/operation/Logistic.h>
+#include <cker/operation/Tanh.h>
 
 namespace onert
 {
@@ -34,9 +35,8 @@ namespace
 
 enum class ActivationType
 {
+  Logistic,
   ReLU,
-  ReLU1,
-  ReLU6,
   Tanh
 };
 
@@ -65,30 +65,25 @@ void prepare(ExecEnv *env, const ir::Operation &node)
   // TODO Util function to compare TensorInfo
   if (input_tensor->data_type() != output_tensor->data_type())
   {
-    throw std::runtime_error{"Interp(Activations): Invalid output type"};
+    throw std::runtime_error{"Interp(ElementwiseActivation): Invalid output type"};
   }
 }
 
 template <ActivationType act_type>
-void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements)
+void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements, float alpha,
+               float beta)
 {
   std::function<float(const float &)> fn = [](const float &) { return std::nanf(""); };
   switch (act_type)
   {
     case ActivationType::ReLU:
-      fn = [](const float &in) { return std::max(0.f, in); };
-      break;
-    case ActivationType::ReLU1:
-      fn = [](const float &in) { return std::min(std::max(-1.f, in), 1.f); };
-      break;
-    case ActivationType::ReLU6:
-      fn = [](const float &in) { return std::min(std::max(0.f, in), 6.f); };
+      fn = [alpha, beta](const float &in) { return std::min(std::max(beta, in), alpha); };
       break;
     case ActivationType::Tanh:
       fn = [](const float &in) { return std::tanh(in); };
       break;
     default:
-      throw std::runtime_error{"Interp(Activations): NYI - Unsupported activation"};
+      throw std::runtime_error{"Interp(ElementwiseActivation): NYI - Unsupported activation"};
       break;
   }
 
@@ -114,38 +109,51 @@ template <ActivationType act_type> void invoke(const ExecEnv *env, const ir::Ope
     uint64_t elements = input_tensor->num_elements();
     const float *input_start = reinterpret_cast<const float *>(input_tensor->bufferRO());
     float *out = reinterpret_cast<float *>(output_tensor->buffer());
-
-    evalFloat<act_type>(input_start, out, elements);
+    if (act_type == ActivationType::Logistic)
+    {
+      const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
+      const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
+      nnfw::cker::Logistic(cker_input_shape, input_start, cker_output_shape, out);
+    }
+    else
+    {
+      const auto &act_node =
+          nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
+      evalFloat<act_type>(input_start, out, elements, act_node.param().alpha,
+                          act_node.param().beta);
+    }
   }
   else
   {
-    throw std::runtime_error{"Interp(ReLU6): NYI - Support float only"};
+    throw std::runtime_error{"Interp(" + node.name() + "): NYI - Support float only"};
   }
 }
 
-} // namespace
-
-OpKernel *getReLU()
+void invokeElementwiseActivation(const ExecEnv *env, const ir::Operation &node)
 {
-  static OpKernel kernel = {prepare, invoke<ActivationType::ReLU>};
-  return &kernel;
-}
-
-OpKernel *getReLU1()
-{
-  static OpKernel kernel = {prepare, invoke<ActivationType::ReLU1>};
-  return &kernel;
+  const auto &act_node =
+      nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
+  switch (act_node.param().op_type)
+  {
+    case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+      invoke<ActivationType::Logistic>(env, node);
+      break;
+    case ir::operation::ElementwiseActivation::Type::RELU:
+      invoke<ActivationType::ReLU>(env, node);
+      break;
+    case ir::operation::ElementwiseActivation::Type::TANH:
+      invoke<ActivationType::Tanh>(env, node);
+      break;
+    default:
+      throw std::runtime_error("Interp(" + node.name() + "): NYI - Unsupported activation");
+  }
 }
 
-OpKernel *getReLU6()
-{
-  static OpKernel kernel = {prepare, invoke<ActivationType::ReLU6>};
-  return &kernel;
-}
+} // namespace
 
-OpKernel *getTanh()
+OpKernel *getElementwiseActivation()
 {
-  static OpKernel kernel = {prepare, invoke<ActivationType::Tanh>};
+  static OpKernel kernel = {prepare, invokeElementwiseActivation};
   return &kernel;
 }
 
diff --git a/runtime/onert/core/src/interp/operations/Logistic.cc b/runtime/onert/core/src/interp/operations/Logistic.cc
deleted file mode 100644
index c23cbb782..000000000
--- a/runtime/onert/core/src/interp/operations/Logistic.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/Logistic.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/Logistic.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareLogistic(ExecEnv *env, const ir::Operation &node)
-{
-  const auto input_index = node.getInputs().at(0);
-  const auto output_index = node.getOutputs().at(0);
-
-  const auto input_tensor = env->tensorAt(input_index);
-
-  const auto output_info = env->graph().operands().at(output_index).info();
-
-  // Check shape and type lhs is same with rhs
-  // TODO Util function to compare TensorInfo
-  if (output_info.total_size() == 0)
-  {
-    throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"};
-  }
-  else
-  {
-    env->allocateIfNeeded(output_index, output_info);
-  }
-
-  const auto output_tensor = env->tensorAt(output_index);
-  if (input_tensor->data_type() != output_tensor->data_type())
-  {
-    throw std::runtime_error{"Interp(Logistic): Invalid output type"};
-  }
-}
-
-void invoke(const ITensor *input_tensor, const ITensor *output_tensor)
-{
-  const auto input_buffer = input_tensor->bufferRO();
-  auto output_buffer = output_tensor->buffer();
-
-  const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
-  const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
-  const float *input_ptr = reinterpret_cast<const float *>(input_buffer);
-  float *output_ptr = reinterpret_cast<float *>(output_buffer);
-
-  nnfw::cker::Logistic(cker_input_shape, input_ptr, cker_output_shape, output_ptr);
-}
-
-void invokeLogistic(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto input_index = node.getInputs().at(0);
-  const auto output_index = node.getOutputs().at(0);
-
-  const auto input_tensor = env->tensorAt(input_index);
-  const auto output_tensor = env->tensorAt(output_index);
-
-  const auto data_type = input_tensor->data_type();
-
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke(input_tensor, output_tensor);
-  }
-  else
-  {
-    throw std::runtime_error{"Interp(Logistic): NYI - Unsupported data type"};
-  }
-}
-} // namespace
-
-OpKernel *getLogistic()
-{
-  static OpKernel kernel = {prepareLogistic, invokeLogistic};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/MaxPool2D.cc b/runtime/onert/core/src/interp/operations/MaxPool2D.cc
deleted file mode 100644
index 313948fb6..000000000
--- a/runtime/onert/core/src/interp/operations/MaxPool2D.cc
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/MaxPool.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/MaxPool2D.h"
-#include "util/Utils.h"
-#include "util/ShapeInference.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareMaxPool2D(ExecEnv *env, const ir::Operation &node)
-{
-  const auto in_index = node.getInputs().at(0);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto in_tensor = env->tensorAt(in_index);
-
-  assert(in_tensor->num_dimensions() == 4);
-  UNUSED_RELEASE(in_tensor);
-
-  const auto output_info = env->graph().operands().at(out_index).info();
-  if (output_info.total_size() == 0)
-  {
-    // Handle unspecified output shape
-    const auto &maxpool_node =
-        nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node);
-    const auto infered_output_shape =
-        shape_inference::inferMaxPoolShape(in_tensor->tensorInfo().shape(), maxpool_node.param());
-    env->allocateIfNeeded(
-        out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
-  }
-  else
-  {
-    env->allocateIfNeeded(out_index, output_info);
-  }
-
-  auto out_tensor = env->tensorAt(out_index);
-  UNUSED_RELEASE(out_tensor);
-
-  // Handle same ifm & ofm data type only
-  assert(in_tensor->data_type() == out_tensor->data_type());
-  assert(out_tensor->num_dimensions() == 4);
-}
-
-void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
-            const ir::operation::MaxPool2D::Param &param)
-{
-  // TODO support NCHW frontend
-  const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  const auto padding =
-      ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
-  // Calculate
-  nnfw::cker::PoolParams cker_param;
-  calculateActivationRange(param.activation, &cker_param.float_activation_min,
-                           &cker_param.float_activation_max);
-  cker_param.filter_width = param.kw;
-  cker_param.filter_height = param.kh;
-  cker_param.padding_values.width = padding.left;
-  cker_param.padding_values.height = padding.top;
-  cker_param.stride_width = param.stride.horizontal;
-  cker_param.stride_height = param.stride.vertical;
-
-  const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
-  const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-  const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
-  float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-
-  nnfw::cker::MaxPool(cker_param, in_shape, in_ptr, out_shape, out_ptr);
-}
-
-void invokeMaxPool2D(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &maxpool_node =
-      nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node);
-
-  const auto in_index = node.getInputs().at(0);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto in_tensor = env->tensorAt(in_index);
-  const auto out_tensor = env->tensorAt(out_index);
-
-  const auto data_type = in_tensor->data_type();
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke(in_tensor, out_tensor, maxpool_node.param());
-  }
-  else
-  {
-    throw std::runtime_error{"NYI: Support float32 only"};
-  }
-}
-} // namespace
-
-OpKernel *getMaxPool2D()
-{
-  static OpKernel kernel = {prepareMaxPool2D, invokeMaxPool2D};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/AvgPool2D.cc b/runtime/onert/core/src/interp/operations/Pool2D.cc
index 42fe42301..92f9d70b2 100644
--- a/runtime/onert/core/src/interp/operations/AvgPool2D.cc
+++ b/runtime/onert/core/src/interp/operations/Pool2D.cc
@@ -15,11 +15,12 @@
  */
 
 #include <cker/operation/AveragePool.h>
+#include <cker/operation/MaxPool.h>
 
 #include "OperationUtil.h"
 
 #include "interp/Registration.h"
-#include "ir/operation/AvgPool2D.h"
+#include "ir/operation/Pool2D.h"
 #include "util/Utils.h"
 #include "util/ShapeInference.h"
 #include "misc/polymorphic_downcast.h"
@@ -28,12 +29,13 @@ namespace onert
 {
 namespace interp
 {
-namespace avgpool2d
+namespace pool2d
 {
 
-void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node)
+void preparePool2D(ExecEnv *env, const ir::Operation &node)
 {
-  const auto in_index = node.getInputs().at(0);
+  const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
+  const auto in_index = node.getInputs().at(pool_node.INPUT);
   const auto out_index = node.getOutputs().at(0);
 
   const auto in_tensor = env->tensorAt(in_index);
@@ -45,10 +47,8 @@ void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node)
   if (output_info.total_size() == 0)
   {
     // Handle unspecified output shape
-    const auto &avgpool_node =
-        nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node);
     const auto infered_output_shape =
-        shape_inference::inferAvgPoolShape(in_tensor->tensorInfo().shape(), avgpool_node.param());
+        shape_inference::inferPoolShape(in_tensor->tensorInfo().shape(), pool_node.param());
     env->allocateIfNeeded(
         out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
   }
@@ -65,18 +65,44 @@ void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node)
   assert(out_tensor->num_dimensions() == 4);
 }
 
-void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
-            const ir::operation::AvgPool2D::Param &param)
+template <typename T>
+void invoke(const nnfw::cker::PoolParams &params, const nnfw::cker::Shape &in_shape,
+            const T *in_ptr, const nnfw::cker::Shape &out_shape, T *out_ptr,
+            ir::operation::Pool2D::PoolType op_type)
 {
-  // TODO Support NCHW frontend
+  switch (op_type)
+  {
+    case ir::operation::Pool2D::PoolType::AVG:
+      nnfw::cker::AveragePool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
+      break;
+    case ir::operation::Pool2D::PoolType::MAX:
+      nnfw::cker::MaxPool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
+      break;
+    default:
+      throw std::runtime_error{"Interp(Pool2D): NYI unsupported operation"};
+      break;
+  }
+}
+
+void invokePool2DOps(const ExecEnv *env, const ir::Operation &node)
+{
+  const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
+
+  const auto in_index = node.getInputs().at(0);
+  const auto out_index = node.getOutputs().at(0);
+
+  // Check lhs shape is same with rhs (with broadcast)
+  const auto in_tensor = env->tensorAt(in_index);
+  const auto out_tensor = env->tensorAt(out_index);
+
+  // TODO support NCHW frontend
   const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
   const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+  const auto param = pool_node.param();
   const auto padding =
       ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
   // Calculate
   nnfw::cker::PoolParams cker_param;
-  calculateActivationRange(param.activation, &cker_param.float_activation_min,
-                           &cker_param.float_activation_max);
   cker_param.filter_width = param.kw;
   cker_param.filter_height = param.kh;
   cker_param.padding_values.width = padding.left;
@@ -84,41 +110,29 @@ void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
   cker_param.stride_width = param.stride.horizontal;
   cker_param.stride_height = param.stride.vertical;
 
-  const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
-  const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-  const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
-  float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-
-  nnfw::cker::AveragePool(cker_param, in_shape, in_ptr, out_shape, out_ptr);
-}
-
-void invokeAvgPool2D(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &avgpool_node =
-      nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node);
-
-  const auto in_index = node.getInputs().at(0);
-  const auto out_index = node.getOutputs().at(0);
-
-  // Check lhs shape is same with rhs (with broadcast)
-  const auto in_tensor = env->tensorAt(in_index);
-  const auto out_tensor = env->tensorAt(out_index);
-
   const auto data_type = in_tensor->data_type();
   if (data_type == ir::DataType::FLOAT32)
   {
-    invoke(in_tensor, out_tensor, avgpool_node.param());
+    calculateActivationRange(param.activation, &cker_param.float_activation_min,
+                             &cker_param.float_activation_max);
+
+    const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
+    const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
+    const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
+    float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
+    // Now, invoke() supports only Pool2D in float
+    invoke<float>(cker_param, in_shape, in_ptr, out_shape, out_ptr, param.op_type);
   }
   else
   {
     throw std::runtime_error{"NYI: Support float only"};
   }
 }
-} // namespace avgpool2d
+} // namespace pool2d
 
-OpKernel *getAvgPool2D()
+OpKernel *getPool2D()
 {
-  static OpKernel kernel = {avgpool2d::prepareAvgPool2D, avgpool2d::invokeAvgPool2D};
+  static OpKernel kernel = {pool2d::preparePool2D, pool2d::invokePool2DOps};
   return &kernel;
 }
 
diff --git a/runtime/onert/core/src/interp/operations/Softmax.cc b/runtime/onert/core/src/interp/operations/Softmax.cc
index 6d9359e1e..d30f78deb 100644
--- a/runtime/onert/core/src/interp/operations/Softmax.cc
+++ b/runtime/onert/core/src/interp/operations/Softmax.cc
@@ -29,43 +29,6 @@ namespace interp
 namespace
 {
 
-void Softmax2D(const float *in, const int input_size, const int batch_size, const float beta,
-               float *out)
-{
-  assert(input_size > 0);
-
-  // For each batch
-  for (int b = 0; b < batch_size; b++)
-  {
-    // Find the max coeff.
-    float max_coeff = in[0];
-    for (int i = 1; i < input_size; i++)
-    {
-      if (in[i] > max_coeff)
-        max_coeff = in[i];
-    }
-
-    // Compute the normalized sum of exps.
-    float exp_sum = 0.0;
-    for (int i = 0; i < input_size; i++)
-    {
-      out[i] = std::exp((in[i] - max_coeff) * beta);
-      exp_sum += out[i];
-    }
-
-    // Divide by the sum of exps.
-    float reciprocal_sum_exp = 1.f / exp_sum;
-    for (int i = 0; i < input_size; i++)
-    {
-      out[i] *= reciprocal_sum_exp;
-    }
-
-    // Advance in and out pointers for the next batch.
-    in += input_size;
-    out += input_size;
-  }
-}
-
 void prepareSoftMax(ExecEnv *env, const ir::Operation &node)
 {
   const auto in_index = node.getInputs().at(0);
@@ -108,7 +71,7 @@ void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
     uint32_t batch_size = in_tensor->dimension(0);
     uint32_t input_size = in_tensor->dimension(1);
 
-    Softmax2D(in_ptr, input_size, batch_size, beta, out_ptr);
+    nnfw::cker::Softmax(in_ptr, input_size, batch_size, beta, out_ptr);
   }
   else if (in_tensor->num_dimensions() == 4)
   {
diff --git a/runtime/onert/core/src/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.cc
index 0db9b6133..fe8b1b443 100644
--- a/runtime/onert/core/src/ir/Graph.cc
+++ b/runtime/onert/core/src/ir/Graph.cc
@@ -56,18 +56,34 @@ void Graph::setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data)
   _operands.at(ind).data(std::move(data));
 }
 
-void Graph::addInput(const OperandIndex &ind)
+void Graph::addInput(const OperandIndex &ind, const std::string &name)
 {
   assert(isBuildingPhase());
+  if (!name.empty())
+    _name_to_input.emplace(name, IOIndex{_inputs.size()});
   _inputs.append(ind);
 }
 
-void Graph::addOutput(const OperandIndex &ind)
+void Graph::addOutput(const OperandIndex &ind, const std::string &name)
 {
   assert(isBuildingPhase());
+  if (!name.empty())
+    _name_to_output.emplace(name, IOIndex{_outputs.size()});
   _outputs.append(ind);
 }
 
+IOIndex Graph::getInputIndex(const std::string &name) const
+{
+  auto itr = _name_to_input.find(name);
+  return (itr == _name_to_input.end()) ? IOIndex{} : itr->second;
+}
+
+IOIndex Graph::getOutputIndex(const std::string &name) const
+{
+  auto itr = _name_to_output.find(name);
+  return (itr == _name_to_output.end()) ? IOIndex{} : itr->second;
+}
+
 void Graph::finishBuilding(void)
 {
   assert(isBuildingPhase());
diff --git a/runtime/onert/core/src/ir/GraphIterator.cc b/runtime/onert/core/src/ir/GraphIterator.cc
index 2b29a9ea9..4bea1a55d 100644
--- a/runtime/onert/core/src/ir/GraphIterator.cc
+++ b/runtime/onert/core/src/ir/GraphIterator.cc
@@ -17,7 +17,7 @@
 #include "GraphIterator.h"
 
 #include "ir/OperationIndexMap.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/ir/GraphIterator.h b/runtime/onert/core/src/ir/GraphIterator.h
index 534ffef80..b54314e0e 100644
--- a/runtime/onert/core/src/ir/GraphIterator.h
+++ b/runtime/onert/core/src/ir/GraphIterator.h
@@ -23,12 +23,19 @@
 
 namespace onert
 {
+namespace compiler
+{
+class LoweredGraph;
+} // namespace compiler
+} // namespace onert
+
+namespace onert
+{
 namespace ir
 {
 
 class Graph;
 class Operation;
-class LoweredGraph;
 class OpSequence;
 
 template <bool is_const> class Iterator
@@ -65,7 +72,8 @@ public:
   using NodeRef = typename Iterator<is_const>::NodeRef;
   using IterFn = typename Iterator<is_const>::IterFn;
   using LoweredGraphRef =
-      typename std::conditional<is_const, const LoweredGraph &, LoweredGraph &>::type;
+      typename std::conditional<is_const, const typename compiler::LoweredGraph &,
+                                typename compiler::LoweredGraph &>::type;
   using OpSequenceRef = typename std::conditional<is_const, const OpSequence &, OpSequence &>::type;
   using OpSeqIndexRef = const OpSequenceIndex &;
   using OpSeqIterFn = std::function<void(OpSeqIndexRef, OpSequenceRef)>;
diff --git a/runtime/onert/core/src/ir/OpSequences.cc b/runtime/onert/core/src/ir/OpSequences.cc
index a87d31a9f..68884783e 100644
--- a/runtime/onert/core/src/ir/OpSequences.cc
+++ b/runtime/onert/core/src/ir/OpSequences.cc
@@ -83,15 +83,6 @@ OpSequenceIndex OpSequences::getOperation(const OperationIndex &operation_index)
   return ret;
 }
 
-// TODO: Extract this into external helper function
-void OpSequences::dump(const std::string &msg, const Operations &operations) const
-{
-  VERBOSE(OpSequences) << "OpSequences(" << msg << ")" << std::endl;
-  iterate([&](const OpSequenceIndex &idx, const OpSequence &op_seq) {
-    VERBOSE(OpSequences) << idx.value() << "] " << getStrFromOpSeq(op_seq, operations) << std::endl;
-  });
-}
-
 void OpSequences::removeFromOpSequence(const OperationIndex &operation_index)
 {
   const auto op_seq_index = findOperation(operation_index);
@@ -122,5 +113,12 @@ OpSequenceIndex OpSequences::findOperation(const OperationIndex &operation_index
   throw std::runtime_error("Operation not found");
 }
 
+void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations)
+{
+  op_seqs.iterate([&](const OpSequenceIndex &idx, const OpSequence &op_seq) {
+    VERBOSE(OpSequences) << idx.value() << "] " << getStrFromOpSeq(op_seq, operations) << std::endl;
+  });
+}
+
 } // namespace ir
 } // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc
index e3cbce57a..48361f464 100644
--- a/runtime/onert/core/src/ir/OperationDumper.cc
+++ b/runtime/onert/core/src/ir/OperationDumper.cc
@@ -27,206 +27,137 @@ namespace ir
 
 using namespace operation;
 
-OperationDumper::OperationDumper(const std::string &start_msg)
+namespace
 {
-  VERBOSE(LIR) << start_msg << std::endl;
-}
-
-void OperationDumper::visit(const Abs &node)
+void dumpUnaryInputOp(const Operation &node, const std::string &adding_input = "")
 {
-  VERBOSE(LIR) << "* Abs" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Abs::Input::INPUT) << ")"
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
+  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(0) << ") " << adding_input
                << std::endl;
   VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
 }
 
-void OperationDumper::visit(const Add &node)
+void dumpBinaryInputOp(const Operation &node, const std::string &adding_input = "")
 {
-  VERBOSE(LIR) << "* Add" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Add::Input::LHS) << ", "
-               << node.getInputs().at(Add::Input::RHS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const ArgMax &node)
-{
-  VERBOSE(LIR) << "* ArgMax" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ArgMax::Input::INPUT) << ")"
-               << std::endl;
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
+  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(0) << ", " << node.getInputs().at(0)
+               << ") " << adding_input << std::endl;
   VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
 }
 
-void OperationDumper::visit(const AvgPool2D &node)
+void dumpConvOp(const Operation &node, const std::string &padding_type)
 {
-  VERBOSE(LIR) << "* AvgPool2D(Implicit)" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : IFM(" << node.getInputs().at(AvgPool2D::Input::INPUT) << ")"
-               << std::endl;
+  VERBOSE(LIR) << "* " << node.name() << "(" << padding_type << ")" << std::endl;
+  VERBOSE(LIR) << "  - Inputs : IFM(" << node.getInputs().at(Conv2D::Input::INPUT) << ") Kernel("
+               << node.getInputs().at(Conv2D::Input::KERNEL) << ") Bias("
+               << node.getInputs().at(Conv2D::Input::BIAS) << ")" << std::endl;
   VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
 }
 
-void OperationDumper::visit(const BatchToSpaceND &node)
+void dumpPackingOp(const Operation &node)
 {
-  VERBOSE(LIR) << "* BatchToSpaceND" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(BatchToSpaceND::Input::INPUT) << ")"
-               << " BlockSize(" << node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE) << ")"
-               << std::endl;
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
+  std::string inputs;
+  for (auto i : node.getInputs())
+  {
+    inputs += std::to_string(i.value()) + ",";
+  }
+  VERBOSE(LIR) << "  - Inputs : Inputs(" << inputs << ")" << std::endl;
   VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
 }
+} // namespace
 
-void OperationDumper::visit(const operation::BroadcastTo &node)
+OperationDumper::OperationDumper(const std::string &start_msg)
 {
-  VERBOSE(LIR) << "* BroadcastTo" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(BroadcastTo::Input::INPUT) << ", "
-               << node.getInputs().at(BroadcastTo::Input::SHAPE) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  VERBOSE(LIR) << start_msg << std::endl;
 }
 
-void OperationDumper::visit(const Cast &node)
-{
-  VERBOSE(LIR) << "* Cast" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Cast::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ArgMax &node) { dumpUnaryInputOp(node); }
 
-void OperationDumper::visit(const Comparison &node)
+void OperationDumper::visit(const BatchToSpaceND &node)
 {
-  VERBOSE(LIR) << "* Comparison" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Comparison::Input::INPUT0) << ", "
-               << node.getInputs().at(Comparison::Input::INPUT1) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string block_size =
+      "BlockSize(" +
+      std::to_string(node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE).value()) + ")";
+  dumpUnaryInputOp(node, block_size);
 }
 
-void OperationDumper::visit(const Concat &node)
-{
-  VERBOSE(LIR) << "* Concat" << std::endl;
-  std::string inputs;
-  for (auto i : node.getInputs())
-  {
-    inputs += std::to_string(i.value()) + ",";
-  }
-  VERBOSE(LIR) << "  - Inputs : IFM(" << inputs << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const BinaryArithmetic &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const operation::BroadcastTo &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const Comparison &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const Concat &node) { dumpPackingOp(node); }
 
 void OperationDumper::visit(const Conv2D &node)
 {
   std::string padding_type =
       node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
-  VERBOSE(LIR) << "* Conv2D(" << padding_type << ")" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : IFM(" << node.getInputs().at(Conv2D::Input::INPUT) << ") Kernel("
-               << node.getInputs().at(Conv2D::Input::KERNEL) << ") Bias("
-               << node.getInputs().at(Conv2D::Input::BIAS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
+  dumpConvOp(node, padding_type);
 }
 
-void OperationDumper::visit(const ConvertFp16ToFp32 &node)
-{
-  VERBOSE(LIR) << "* ConvertFp16ToFp32" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ConvertFp16ToFp32::Input::INPUT)
-               << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ConvertFp16ToFp32 &node) { dumpUnaryInputOp(node); }
 
-void OperationDumper::visit(const ConvertFp32ToFp16 &node)
-{
-  VERBOSE(LIR) << "* ConvertFp32ToFp16" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ConvertFp32ToFp16::Input::INPUT)
-               << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Cos &node)
-{
-  VERBOSE(LIR) << "* Cos" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Cos::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ConvertFp32ToFp16 &node) { dumpUnaryInputOp(node); }
 
-void OperationDumper::visit(const DepthToSpace &node)
-{
-  VERBOSE(LIR) << "* DepthToSpace" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(DepthToSpace::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const DepthToSpace &node) { dumpUnaryInputOp(node); }
 
 void OperationDumper::visit(const DepthwiseConv2D &node)
 {
   std::string padding_type =
       node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
-  VERBOSE(LIR) << "* DepthwiseConv2D(" << padding_type << ")" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : IFM(" << node.getInputs().at(DepthwiseConv2D::Input::INPUT)
-               << ") Kernel(" << node.getInputs().at(DepthwiseConv2D::Input::KERNEL) << ") Bias("
-               << node.getInputs().at(DepthwiseConv2D::Input::BIAS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
+  dumpConvOp(node, padding_type);
 }
 
-void OperationDumper::visit(const Dequantize &node)
+void OperationDumper::visit(const ElementwiseActivation &node)
 {
-  VERBOSE(LIR) << "* Dequantize" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Dequantize::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string params;
+  if (node.param().op_type == ElementwiseActivation::Type::RELU)
+  {
+    params = " lower value(" + std::to_string(node.param().alpha) + ") upper value(" +
+             std::to_string(node.param().beta) + ")";
+  }
+  else if (node.param().op_type == ElementwiseActivation::Type::LEAKY_RELU)
+  {
+    params = " alpha value(" + std::to_string(node.param().alpha) + ")";
+  }
+  dumpUnaryInputOp(node, params);
 }
 
-void OperationDumper::visit(const Div &node)
-{
-  VERBOSE(LIR) << "* Div" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Div::Input::LHS) << ", "
-               << node.getInputs().at(Div::Input::RHS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ElementwiseBinary &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const ElementwiseUnary &node) { dumpUnaryInputOp(node); }
 
 void OperationDumper::visit(const EmbeddingLookup &node)
 {
-  VERBOSE(LIR) << "* EmbeddingLookup" << std::endl;
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
   VERBOSE(LIR) << "  - Inputs : Lookups(" << node.getInputs().at(EmbeddingLookup::Input::LOOKUPS)
                << ") VALUES(" << node.getInputs().at(EmbeddingLookup::Input::VALUES) << ")"
                << std::endl;
   VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
 }
 
-void OperationDumper::visit(const Exp &node)
-{
-  VERBOSE(LIR) << "* Exp" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Exp::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
 void OperationDumper::visit(const ExpandDims &node)
 {
-  VERBOSE(LIR) << "* ExpandDims" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ExpandDims::Input::INPUT)
-               << ") AXIS(" << node.getInputs().at(ExpandDims::Input::AXIS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Floor &node)
-{
-  VERBOSE(LIR) << "* Floor" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Floor::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string axis =
+      "AXIS(" + std::to_string(node.getInputs().at(ExpandDims::Input::AXIS).value()) + ")";
+  dumpUnaryInputOp(node, axis);
 }
 
 void OperationDumper::visit(const FullyConnected &node)
 {
-  VERBOSE(LIR) << "* FullyConnected" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : IFM(" << node.getInputs().at(FullyConnected::Input::INPUT)
-               << ") Weight(" << node.getInputs().at(FullyConnected::Input::WEIGHT) << ") Bias("
-               << node.getInputs().at(FullyConnected::Input::BIAS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string inputs =
+      "Weight(" + std::to_string(node.getInputs().at(FullyConnected::Input::WEIGHT).value()) +
+      ") Bias(" + std::to_string(node.getInputs().at(FullyConnected::Input::BIAS).value()) + ")";
+  dumpUnaryInputOp(node, inputs);
 }
 
 void OperationDumper::visit(const Gather &node)
 {
-  VERBOSE(LIR) << "* Gather" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Gather::Input::INPUT) << ") Indices("
-               << node.getInputs().at(Gather::Input::INDICES) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string indices =
+      "Indices(" + std::to_string(node.getInputs().at(Gather::Input::INDICES).value()) + ")";
+  dumpUnaryInputOp(node, indices);
 }
 
 void OperationDumper::visit(const HashtableLookup &node)
@@ -242,36 +173,15 @@ void OperationDumper::visit(const HashtableLookup &node)
 
 void OperationDumper::visit(const InstanceNorm &node)
 {
-  VERBOSE(LIR) << "* InstanceNorm" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : IFM(" << node.getInputs().at(InstanceNorm::Input::INPUT)
-               << ") Gamma(" << node.getInputs().at(InstanceNorm::Input::GAMMA) << ") Beta("
-               << node.getInputs().at(InstanceNorm::Input::BETA) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const L2Normalization &node)
-{
-  VERBOSE(LIR) << "* L2Normalization" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(L2Normalization::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string inputs =
+      "Gamma(" + std::to_string(node.getInputs().at(InstanceNorm::Input::GAMMA).value()) +
+      ") Beta(" + std::to_string(node.getInputs().at(InstanceNorm::Input::BETA).value()) + ")";
+  dumpUnaryInputOp(node, inputs);
 }
 
-void OperationDumper::visit(const L2Pool2D &node)
-{
-  VERBOSE(LIR) << "* L2Pool2D" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(L2Pool2D::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const L2Normalization &node) { dumpUnaryInputOp(node); }
 
-void OperationDumper::visit(const LocalResponseNormalization &node)
-{
-  VERBOSE(LIR) << "* LocalResponseNormalization" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input("
-               << node.getInputs().at(LocalResponseNormalization::Input::INPUT) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const LocalResponseNormalization &node) { dumpUnaryInputOp(node); }
 
 void OperationDumper::visit(const LSTM &node)
 {
@@ -307,93 +217,12 @@ void OperationDumper::visit(const LSTM &node)
                << node.getInputs().at(LSTM::Output::OUTPUT) << ")" << std::endl;
 }
 
-void OperationDumper::visit(const Log &node)
-{
-  VERBOSE(LIR) << "* Log" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Log::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const LogicalAnd &node)
-{
-  VERBOSE(LIR) << "* LogicalAnd" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(LogicalAnd::Input::INPUT0) << ", "
-               << node.getInputs().at(LogicalAnd::Input::INPUT1) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const LogicalNot &node)
-{
-  VERBOSE(LIR) << "* LogicalNot" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(LogicalNot::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const LogicalOr &node)
-{
-  VERBOSE(LIR) << "* LogicalOr" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(LogicalOr::Input::INPUT0) << ", "
-               << node.getInputs().at(LogicalOr::Input::INPUT1) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Logistic &node)
-{
-  VERBOSE(LIR) << "* Logistic" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Logistic::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const MaxPool2D &node)
-{
-  std::string padding_type =
-      node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
-  VERBOSE(LIR) << "* MaxPool2D(" << padding_type << ")" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : IFM(" << node.getInputs().at(MaxPool2D::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Mul &node)
-{
-  VERBOSE(LIR) << "* Mul" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Mul::Input::LHS) << ", "
-               << node.getInputs().at(Mul::Input::RHS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Neg &node)
-{
-  VERBOSE(LIR) << "* Neg" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Neg::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Pack &node)
-{
-  VERBOSE(LIR) << "* Pack" << std::endl;
-  std::string inputs;
-  const auto &input_indices = node.getInputs();
-  for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
-  {
-    inputs += std::to_string(it->value());
-    if (std::next(it) != std::end(input_indices))
-      inputs += ", ";
-  }
-  VERBOSE(LIR) << "  - Inputs : Inputs(" << inputs << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Pack &node) { dumpPackingOp(node); }
 
 void OperationDumper::visit(const Pad &node)
 {
-  VERBOSE(LIR) << "* Pad" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Pad::Input::INPUT) << ") Pad("
-               << node.getInputs().at(Pad::Input::PAD) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string pad = "Pad(" + std::to_string(node.getInputs().at(Pad::Input::PAD).value()) + ")";
+  dumpUnaryInputOp(node, pad);
 }
 
 void OperationDumper::visit(const Permute &node)
@@ -417,86 +246,46 @@ void OperationDumper::visit(const Permute &node)
   VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
 }
 
-void OperationDumper::visit(const Pow &node)
+void OperationDumper::visit(const Pool2D &node)
 {
-  VERBOSE(LIR) << "* Pow" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Pow::Input::LHS) << ", "
-               << node.getInputs().at(Pow::Input::RHS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const PReLU &node)
-{
-  VERBOSE(LIR) << "* PReLU" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(PReLU::Input::INPUT) << ") Alpha("
-               << node.getInputs().at(PReLU::Input::ALPHA) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Reduce &node)
-{
-  VERBOSE(LIR) << "* " + node.name() << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Reduce::Input::INPUT) << ")"
+  std::string padding_type =
+      node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+  VERBOSE(LIR) << "* " << node.name() << "(" << padding_type << ")" << std::endl;
+  VERBOSE(LIR) << "  - Inputs : IFM(" << node.getInputs().at(Pool2D::Input::INPUT) << ")"
                << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
 }
 
-void OperationDumper::visit(const ReLU &node)
-{
-  VERBOSE(LIR) << "* ReLU" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ReLU::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Pow &node) { dumpBinaryInputOp(node); }
 
-void OperationDumper::visit(const ReLU1 &node)
+void OperationDumper::visit(const PReLU &node)
 {
-  VERBOSE(LIR) << "* ReLU1" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ReLU1::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string alpha =
+      "Alpha(" + std::to_string(node.getInputs().at(PReLU::Input::ALPHA).value()) + ")";
+  dumpUnaryInputOp(node, alpha);
 }
 
-void OperationDumper::visit(const ReLU6 &node)
-{
-  VERBOSE(LIR) << "* ReLU6" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ReLU6::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Rank &node) { dumpUnaryInputOp(node); }
+
+void OperationDumper::visit(const Reduce &node) { dumpUnaryInputOp(node); }
 
 void OperationDumper::visit(const Reshape &node)
 {
-  VERBOSE(LIR) << "* Reshape" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Reshape::Input::INPUT) << ")";
   // optional param
-  if (node.getInputs().size() == 2)
-  {
-    VERBOSE(LIR) << " Shape(" << node.getInputs().at(Reshape::Input::SHAPE) << ")";
-  }
-  else
-  {
-    VERBOSE(LIR) << " Shape(not provided)";
-  }
-  VERBOSE(LIR) << std::endl;
-
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string shape =
+      node.getInputs().size() == 2
+          ? "Shape(" + std::to_string(node.getInputs().at(Reshape::Input::SHAPE).value()) + ")"
+          : "Shape(not provided)";
+  dumpUnaryInputOp(node, shape);
 }
 
-void OperationDumper::visit(const ResizeBilinear &node)
-{
-  VERBOSE(LIR) << "* ResizeBilinear" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ResizeBilinear::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ResizeBilinear &node) { dumpUnaryInputOp(node); }
 
 void OperationDumper::visit(const Reverse &node)
 {
-  VERBOSE(LIR) << "* Reverse" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Reverse::Input::INPUT) << ") Axis("
-               << node.getInputs().at(Reverse::Input::AXIS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string axis =
+      "Axis(" + std::to_string(node.getInputs().at(Reverse::Input::AXIS).value()) + ")";
+  dumpUnaryInputOp(node, axis);
 }
 
 void OperationDumper::visit(const RNN &node)
@@ -512,162 +301,65 @@ void OperationDumper::visit(const RNN &node)
                << std::endl;
 }
 
-void OperationDumper::visit(const Round &node)
-{
-  VERBOSE(LIR) << "* Round" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Round::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
 void OperationDumper::visit(const Range &node)
 {
   VERBOSE(LIR) << "* Range" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Range::Input::START) << ")"
+  VERBOSE(LIR) << "  - Inputs : Start(" << node.getInputs().at(Range::Input::START) << ")"
                << " Limit(" << node.getInputs().at(Range::Input::LIMIT) << ")"
                << " Delta(" << node.getInputs().at(Range::Input::DELTA) << ")" << std::endl;
   VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
 }
 
-void OperationDumper::visit(const RSQRT &node)
-{
-  VERBOSE(LIR) << "* RSQRT" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(RSQRT::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
 void OperationDumper::visit(const Select &node)
 {
   VERBOSE(LIR) << "* Select" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Select::Input::CONDITION) << ")"
+  VERBOSE(LIR) << "  - Inputs : Condition(" << node.getInputs().at(Select::Input::CONDITION) << ")"
                << " Input_X(" << node.getInputs().at(Select::Input::INPUT_TRUE) << ")"
                << " Input_Y(" << node.getInputs().at(Select::Input::INPUT_FALSE) << ")"
                << std::endl;
   VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
 }
 
-void OperationDumper::visit(const ir::operation::Shape &node)
-{
-  VERBOSE(LIR) << "* Shape" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ir::operation::Shape::Input::INPUT)
-               << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Sin &node)
-{
-  VERBOSE(LIR) << "* Sin" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Sin::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ir::operation::Shape &node) { dumpUnaryInputOp(node); }
 
-void OperationDumper::visit(const Softmax &node)
-{
-  VERBOSE(LIR) << "* Softmax" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Softmax::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Softmax &node) { dumpUnaryInputOp(node); }
 
 void OperationDumper::visit(const SpaceToBatchND &node)
 {
-  VERBOSE(LIR) << "* SpaceToBatchND" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(SpaceToBatchND::Input::INPUT)
-               << ") BlockSize(" << node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE)
-               << ") Paddings(" << node.getInputs().at(SpaceToBatchND::Input::PADDINGS) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string inputs =
+      "BlockSize(" +
+      std::to_string(node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE).value()) +
+      ") Paddings(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::PADDINGS).value()) +
+      ")";
+  dumpUnaryInputOp(node, inputs);
 }
 
-void OperationDumper::visit(const SpaceToDepth &node)
-{
-  VERBOSE(LIR) << "* SpaceToDepth" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(SpaceToDepth::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const SpaceToDepth &node) { dumpUnaryInputOp(node); }
 
-void OperationDumper::visit(const Split &node)
-{
-  VERBOSE(LIR) << "* Split" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Split::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const SQRT &node)
-{
-  VERBOSE(LIR) << "* SQRT" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(SQRT::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Split &node) { dumpUnaryInputOp(node); }
 
-void OperationDumper::visit(const SquaredDifference &node)
-{
-  VERBOSE(LIR) << "* SquaredDifference" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(SquaredDifference::Input::LHS)
-               << ", " << node.getInputs().at(SquaredDifference::Input::RHS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const SquaredDifference &node) { dumpBinaryInputOp(node); }
 
 void OperationDumper::visit(const StatelessRandomUniform &node)
 {
   VERBOSE(LIR) << "* StatelessRandomUniform" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(StatelessRandomUniform::Input::SHAPE)
-               << ", " << node.getInputs().at(StatelessRandomUniform::Input::SEED) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Squeeze &node)
-{
-  VERBOSE(LIR) << "* Squeeze" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Squeeze::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Slice &node)
-{
-  VERBOSE(LIR) << "* Slice" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Slice::Input::INPUT) << ")"
+  VERBOSE(LIR) << "  - Inputs : Shape(" << node.getInputs().at(StatelessRandomUniform::Input::SHAPE)
+               << " Seed(" << node.getInputs().at(StatelessRandomUniform::Input::SEED) << ")"
                << std::endl;
   VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
 }
 
-void OperationDumper::visit(const StridedSlice &node)
-{
-  VERBOSE(LIR) << "* StridedSlice" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(StridedSlice::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Squeeze &node) { dumpUnaryInputOp(node); }
 
-void OperationDumper::visit(const Sub &node)
-{
-  VERBOSE(LIR) << "* Sub" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Sub::Input::LHS) << ", "
-               << node.getInputs().at(Sub::Input::RHS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Slice &node) { dumpUnaryInputOp(node); }
 
-void OperationDumper::visit(const Tanh &node)
-{
-  VERBOSE(LIR) << "* TanH" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Tanh::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const StridedSlice &node) { dumpUnaryInputOp(node); }
 
 void OperationDumper::visit(const Tile &node)
 {
-  VERBOSE(LIR) << "* Tile" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Tile::Input::INPUT) << ", "
-               << node.getInputs().at(Tile::Input::MULTIPLES) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string multiples =
+      "Multiples(" + std::to_string(node.getInputs().at(Tile::Input::MULTIPLES).value()) + ")";
+  dumpUnaryInputOp(node, multiples);
 }
 
 void OperationDumper::visit(const TopKV2 &node)
@@ -692,17 +384,11 @@ void OperationDumper::visit(const TransposeConv &node)
   VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
 }
 
-void OperationDumper::visit(const Transpose &node)
-{
-  VERBOSE(LIR) << "* Transpose" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Transpose::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Transpose &node) { dumpUnaryInputOp(node); }
 
 void OperationDumper::visit(const Unpack &node)
 {
-  VERBOSE(LIR) << "* Unpack" << std::endl;
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
   VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Unpack::Input::INPUT) << ")"
                << std::endl;
   std::string outputs;
@@ -716,25 +402,9 @@ void OperationDumper::visit(const Unpack &node)
   VERBOSE(LIR) << "  - Outputs : Outputs(" << outputs << ")" << std::endl;
 }
 
-void OperationDumper::visit(const Min &node)
-{
-  VERBOSE(LIR) << "* Min" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Min::Input::LHS) << ", "
-               << node.getInputs().at(Min::Input::RHS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Max &node)
-{
-  VERBOSE(LIR) << "* Max" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Max::Input::LHS) << ", "
-               << node.getInputs().at(Max::Input::RHS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
 void OperationDumper::visit(const OneHot &node)
 {
-  VERBOSE(LIR) << "* OneHot" << std::endl;
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
   VERBOSE(LIR) << "  - Inputs : "
                << "Indices(" << node.getInputs().at(OneHot::Input::INDICES) << ") " << std::endl;
   VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
@@ -742,7 +412,7 @@ void OperationDumper::visit(const OneHot &node)
 
 void OperationDumper::visit(const If &node)
 {
-  VERBOSE(LIR) << "* If" << std::endl;
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
   std::string inputs;
   const auto &input_indices = node.getInputs();
   for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
@@ -767,7 +437,7 @@ void OperationDumper::visit(const If &node)
 
 void OperationDumper::visit(const While &node)
 {
-  VERBOSE(LIR) << "* While" << std::endl;
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
   std::string inputs;
   const auto &input_indices = node.getInputs();
   for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
@@ -790,13 +460,5 @@ void OperationDumper::visit(const While &node)
   VERBOSE(LIR) << "  - Output : Outputs(" << outputs << ")" << std::endl;
 }
 
-void OperationDumper::visit(const ZerosLike &node)
-{
-  VERBOSE(LIR) << "* RoZerosLike" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ZerosLike::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
 } // namespace ir
 } // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationDumper.h b/runtime/onert/core/src/ir/OperationDumper.h
index d83f1493f..e8ab3b3cd 100644
--- a/runtime/onert/core/src/ir/OperationDumper.h
+++ b/runtime/onert/core/src/ir/OperationDumper.h
@@ -31,85 +31,61 @@ public:
   OperationDumper(const std::string &start_msg);
 
 public:
-  void visit(const operation::Abs &) override;
-  void visit(const operation::Add &node) override;
   void visit(const operation::ArgMax &) override;
-  void visit(const operation::AvgPool2D &node) override;
   void visit(const operation::BatchToSpaceND &node) override;
+  void visit(const operation::BinaryArithmetic &node) override;
   void visit(const operation::BroadcastTo &) override;
-  void visit(const operation::Cast &) override;
   void visit(const operation::Comparison &) override;
   void visit(const operation::Concat &node) override;
   void visit(const operation::Conv2D &node) override;
   void visit(const operation::ConvertFp16ToFp32 &node) override;
   void visit(const operation::ConvertFp32ToFp16 &node) override;
-  void visit(const operation::Cos &node) override;
   void visit(const operation::DepthToSpace &) override;
   void visit(const operation::DepthwiseConv2D &node) override;
-  void visit(const operation::Dequantize &) override;
-  void visit(const operation::Div &) override;
+  void visit(const operation::ElementwiseActivation &) override;
+  void visit(const operation::ElementwiseBinary &) override;
+  void visit(const operation::ElementwiseUnary &) override;
   void visit(const operation::EmbeddingLookup &) override;
-  void visit(const operation::Exp &) override;
   void visit(const operation::ExpandDims &) override;
-  void visit(const operation::Floor &) override;
   void visit(const operation::FullyConnected &node) override;
   void visit(const operation::Gather &) override;
   void visit(const operation::HashtableLookup &) override;
   void visit(const operation::InstanceNorm &) override;
   void visit(const operation::L2Normalization &) override;
-  void visit(const operation::L2Pool2D &) override;
   void visit(const operation::LocalResponseNormalization &) override;
-  void visit(const operation::Log &) override;
-  void visit(const operation::LogicalAnd &) override;
-  void visit(const operation::LogicalNot &) override;
-  void visit(const operation::LogicalOr &) override;
-  void visit(const operation::Logistic &) override;
   void visit(const operation::LSTM &) override;
-  void visit(const operation::MaxPool2D &node) override;
-  void visit(const operation::Mul &) override;
-  void visit(const operation::Neg &) override;
   void visit(const operation::Pack &) override;
   void visit(const operation::Pad &) override;
   void visit(const operation::Permute &node) override;
+  void visit(const operation::Pool2D &node) override;
   void visit(const operation::Pow &node) override;
   void visit(const operation::PReLU &) override;
   void visit(const operation::Range &) override;
+  void visit(const operation::Rank &) override;
   void visit(const operation::Reduce &) override;
-  void visit(const operation::ReLU &) override;
-  void visit(const operation::ReLU1 &) override;
-  void visit(const operation::ReLU6 &) override;
   void visit(const operation::Reshape &node) override;
   void visit(const operation::ResizeBilinear &) override;
   void visit(const operation::Reverse &) override;
   void visit(const operation::RNN &) override;
-  void visit(const operation::Round &) override;
-  void visit(const operation::RSQRT &) override;
   void visit(const operation::Select &node) override;
   void visit(const operation::Shape &node) override;
-  void visit(const operation::Sin &node) override;
   void visit(const operation::Softmax &node) override;
   void visit(const operation::SpaceToBatchND &) override;
   void visit(const operation::SpaceToDepth &) override;
   void visit(const operation::Split &) override;
-  void visit(const operation::SQRT &) override;
   void visit(const operation::SquaredDifference &) override;
   void visit(const operation::Squeeze &) override;
   void visit(const operation::Slice &) override;
   void visit(const operation::StridedSlice &) override;
   void visit(const operation::StatelessRandomUniform &) override;
-  void visit(const operation::Sub &) override;
-  void visit(const operation::Tanh &) override;
   void visit(const operation::Tile &) override;
   void visit(const operation::TopKV2 &) override;
   void visit(const operation::TransposeConv &) override;
   void visit(const operation::Transpose &) override;
   void visit(const operation::Unpack &) override;
-  void visit(const operation::Min &) override;
-  void visit(const operation::Max &) override;
   void visit(const operation::OneHot &) override;
   void visit(const operation::If &) override;
   void visit(const operation::While &) override;
-  void visit(const operation::ZerosLike &) override;
 };
 
 } // namespace ir
diff --git a/runtime/onert/core/src/ir/Padding.cc b/runtime/onert/core/src/ir/Padding.cc
index 31969911f..d74f80217 100644
--- a/runtime/onert/core/src/ir/Padding.cc
+++ b/runtime/onert/core/src/ir/Padding.cc
@@ -50,7 +50,7 @@ inline ExplicitPadding validPadding(void)
 }
 
 inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const Stride &stride,
-                                           uint32_t kw, uint32_t kh)
+                                           uint32_t kw, uint32_t kh, uint32_t dwf, uint32_t dhf)
 {
   ExplicitPadding padding;
 
@@ -61,14 +61,19 @@ inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const
   // padding_to_beginning = total_padding / 2
   // padding_to_end = (total_padding + 1)/2.
   //
+  const int32_t effective_filter_h_size = (kh - 1) * dhf + 1;
+  const int32_t effective_filter_w_size = (kw - 1) * dwf + 1;
+
   const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
   const int32_t horizontal_expected_output =
       (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
 
-  const int32_t vertical_needed_input = (vertical_expected_output - 1) * stride.vertical + kh;
+  const int32_t vertical_needed_input =
+      (vertical_expected_output - 1) * stride.vertical + effective_filter_h_size;
   const int32_t vertical_total_padding = std::max(0, vertical_needed_input - ifm_shape.H);
 
-  const int32_t horizontal_needed_input = (horizontal_expected_output - 1) * stride.horizontal + kw;
+  const int32_t horizontal_needed_input =
+      (horizontal_expected_output - 1) * stride.horizontal + effective_filter_w_size;
   const int32_t horizontal_total_padding = std::max(0, horizontal_needed_input - ifm_shape.W);
 
   padding.top = vertical_total_padding / 2;
@@ -80,7 +85,8 @@ inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const
 }
 
 inline ExplicitPadding samePadding(const FeatureShape &ifm_shape, const FeatureShape &ofm_shape,
-                                   const Stride &stride, uint32_t kw, uint32_t kh)
+                                   const Stride &stride, uint32_t kw, uint32_t kh, uint32_t dwf,
+                                   uint32_t dhf)
 {
   const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
   const int32_t horizontal_expected_output =
@@ -92,7 +98,7 @@ inline ExplicitPadding samePadding(const FeatureShape &ifm_shape, const FeatureS
   UNUSED_RELEASE(vertical_expected_output);
   UNUSED_RELEASE(horizontal_expected_output);
 
-  return samePaddingUsingIFM(ifm_shape, stride, kw, kh);
+  return samePaddingUsingIFM(ifm_shape, stride, kw, kh, dwf, dhf);
 }
 
 } // namespace
@@ -130,7 +136,7 @@ Padding::Padding(uint32_t left, uint32_t right, uint32_t top, uint32_t bottom)
 
 const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape,
                                        const FeatureShape &ofm_shape, const Stride &stride,
-                                       uint32_t kw, uint32_t kh)
+                                       uint32_t kw, uint32_t kh, uint32_t dwf, uint32_t dhf)
 {
   if (padding.type == PaddingType::EXPLICIT)
   {
@@ -138,7 +144,7 @@ const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShap
   }
   else if (padding.type == PaddingType::SAME)
   {
-    return samePadding(ifm_shape, ofm_shape, stride, kw, kh);
+    return samePadding(ifm_shape, ofm_shape, stride, kw, kh, dwf, dhf);
   }
   else if (padding.type == PaddingType::VALID)
   {
diff --git a/runtime/onert/core/src/ir/operation/Abs.cc b/runtime/onert/core/src/ir/operation/Abs.cc
deleted file mode 100644
index b06705d07..000000000
--- a/runtime/onert/core/src/ir/operation/Abs.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Abs.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Abs::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Abs::Abs(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Add.cc b/runtime/onert/core/src/ir/operation/Add.cc
deleted file mode 100644
index 2fa30f8ed..000000000
--- a/runtime/onert/core/src/ir/operation/Add.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Add.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Add::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Add::Add(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-         const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/AvgPool2D.cc b/runtime/onert/core/src/ir/operation/AvgPool2D.cc
deleted file mode 100644
index 28d4fcb54..000000000
--- a/runtime/onert/core/src/ir/operation/AvgPool2D.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/AvgPool2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void AvgPool2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-AvgPool2D::AvgPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-                     const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Quantize.cc b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
index 0e3d5b69b..2b1422c73 100644
--- a/runtime/onert/core/src/ir/operation/Quantize.cc
+++ b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
@@ -14,7 +14,10 @@
  * limitations under the License.
  */
 
-#include "ir/operation/Quantize.h"
+#include "ir/operation/BinaryArithmetic.h"
+
+#include <cassert>
+#include <unordered_map>
 
 #include "ir/OperationVisitor.h"
 
@@ -25,11 +28,23 @@ namespace ir
 namespace operation
 {
 
-void Quantize::accept(OperationVisitor &v) const { v.visit(*this); }
+void BinaryArithmetic::accept(OperationVisitor &v) const { v.visit(*this); }
+
+BinaryArithmetic::BinaryArithmetic(const OperandIndexSequence &inputs,
+                                   const OperandIndexSequence &outputs, const Param &param)
+    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+{
+}
 
-Quantize::Quantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+std::string BinaryArithmetic::name() const
 {
+  using ArithmeticType = onert::ir::operation::BinaryArithmetic::ArithmeticType;
+  static const std::unordered_map<ArithmeticType, std::string> name_map{
+      {ArithmeticType::ADD, std::string{"Add"}},
+      {ArithmeticType::SUB, std::string{"Sub"}},
+      {ArithmeticType::MUL, std::string{"Mul"}},
+      {ArithmeticType::DIV, std::string{"Div"}}};
+  return name_map.at(_param.arithmetic_type);
 }
 
 } // namespace operation
diff --git a/runtime/onert/core/src/ir/operation/Cast.cc b/runtime/onert/core/src/ir/operation/Cast.cc
deleted file mode 100644
index 09d9c327e..000000000
--- a/runtime/onert/core/src/ir/operation/Cast.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Cast.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Cast::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Cast::Cast(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Dequantize.cc b/runtime/onert/core/src/ir/operation/Dequantize.cc
deleted file mode 100644
index 14d6362bd..000000000
--- a/runtime/onert/core/src/ir/operation/Dequantize.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Dequantize.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Dequantize::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Dequantize::Dequantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Div.cc b/runtime/onert/core/src/ir/operation/Div.cc
deleted file mode 100644
index b095d9811..000000000
--- a/runtime/onert/core/src/ir/operation/Div.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Div.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Div::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Div::Div(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-         const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
new file mode 100644
index 000000000..f6718b656
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ElementwiseActivation.h"
+
+#include <cassert>
+#include <unordered_map>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ElementwiseActivation::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ElementwiseActivation::ElementwiseActivation(const OperandIndexSequence &inputs,
+                                             const OperandIndexSequence &outputs,
+                                             const Param &param)
+    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+  if (param.op_type == Type::LOGISTIC)
+  {
+    assert(param.alpha == 0.0f && param.beta == 0.0f && "Logistic will be supported only as "
+                                                        "sigmoid function(L=1, k=1, x0=0). So, do "
+                                                        "not use alpha and beta");
+  }
+  else if (param.op_type == Type::RELU)
+  {
+    assert(param.alpha >= param.beta && "ReLU's alpha must be equal or greater than beta");
+  }
+  else if (param.op_type == Type::TANH)
+  {
+    assert(param.alpha == 1.0f && param.beta == 1.0f && "f(x) = alpha * tanh(beta * x), Tanh is "
+                                                        "supported only the values of alpha and "
+                                                        "beta are 1.f");
+  }
+}
+
+std::string ElementwiseActivation::name() const
+{
+  using ElementwiseActivationType = onert::ir::operation::ElementwiseActivation::Type;
+  static const std::unordered_map<Type, std::string> name_map{
+      {ElementwiseActivationType::ELU, "ELU"},
+      {ElementwiseActivationType::LOGISTIC, "Logistic"},
+      {ElementwiseActivationType::RELU, "ReLU"},
+      {ElementwiseActivationType::TANH, "Tanh"},
+      {ElementwiseActivationType::LEAKY_RELU, "LeakyRelu"}};
+  return name_map.at(_param.op_type);
+}
+
+float ElementwiseActivation::infinity = std::numeric_limits<float>::infinity();
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
new file mode 100644
index 000000000..3287fc0a3
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ElementwiseBinary.h"
+
+#include <cassert>
+#include <unordered_map>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ElementwiseBinary::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ElementwiseBinary::ElementwiseBinary(const OperandIndexSequence &inputs,
+                                     const OperandIndexSequence &outputs, const Param &param)
+    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+{
+}
+
+std::string ElementwiseBinary::name() const
+{
+  using ElementwiseBinaryType = onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType;
+  static const std::unordered_map<ElementwiseBinaryType, std::string> name_map{
+      {ElementwiseBinaryType::LOGICAL_AND, std::string{"LogicalAnd"}},
+      {ElementwiseBinaryType::LOGICAL_OR, std::string{"LogicalOr"}},
+      {ElementwiseBinaryType::MAX, std::string{"Max"}},
+      {ElementwiseBinaryType::MIN, std::string{"Min"}}};
+  return name_map.at(_param.op_type);
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
new file mode 100644
index 000000000..7dfcd4a98
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ElementwiseUnary.h"
+
+#include <cassert>
+#include <unordered_map>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ElementwiseUnary::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ElementwiseUnary::ElementwiseUnary(const OperandIndexSequence &inputs,
+                                   const OperandIndexSequence &outputs, const Param &param)
+    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+std::string ElementwiseUnary::name() const
+{
+  using ElementwiseUnaryType = onert::ir::operation::ElementwiseUnary::Type;
+  static const std::unordered_map<ElementwiseUnaryType, std::string> name_map{
+      {ElementwiseUnaryType::ABS, std::string{"Abs"}},
+      {ElementwiseUnaryType::CAST, std::string{"Cast"}},
+      {ElementwiseUnaryType::COS, std::string{"Cos"}},
+      {ElementwiseUnaryType::DEQUANTIZE, std::string{"Dequantize"}},
+      {ElementwiseUnaryType::ERF, std::string{"Erf"}},
+      {ElementwiseUnaryType::EXP, std::string{"Exp"}},
+      {ElementwiseUnaryType::FLOOR, std::string{"Floor"}},
+      {ElementwiseUnaryType::LOG, std::string{"Log"}},
+      {ElementwiseUnaryType::LOGICAL_NOT, std::string{"LogicalNot"}},
+      {ElementwiseUnaryType::NEG, std::string{"Neg"}},
+      {ElementwiseUnaryType::QUANTIZE, std::string{"Quantize"}},
+      {ElementwiseUnaryType::ROUND, std::string{"Round"}},
+      {ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}},
+      {ElementwiseUnaryType::SIN, std::string{"Sin"}},
+      {ElementwiseUnaryType::SQRT, std::string{"Sqrt"}},
+      {ElementwiseUnaryType::SQURE, std::string{"Squre"}},
+      {ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}};
+  return name_map.at(_param.op_type);
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Exp.cc b/runtime/onert/core/src/ir/operation/Exp.cc
deleted file mode 100644
index 0b22e080a..000000000
--- a/runtime/onert/core/src/ir/operation/Exp.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Exp.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Exp::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Exp::Exp(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Floor.cc b/runtime/onert/core/src/ir/operation/Floor.cc
deleted file mode 100644
index dc01535ad..000000000
--- a/runtime/onert/core/src/ir/operation/Floor.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Floor.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Floor::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Floor::Floor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/L2Pool2D.cc b/runtime/onert/core/src/ir/operation/L2Pool2D.cc
deleted file mode 100644
index 8f21b93e0..000000000
--- a/runtime/onert/core/src/ir/operation/L2Pool2D.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/L2Pool2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void L2Pool2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-L2Pool2D::L2Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-                   const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LogicalAnd.cc b/runtime/onert/core/src/ir/operation/LogicalAnd.cc
deleted file mode 100644
index 0d50706ca..000000000
--- a/runtime/onert/core/src/ir/operation/LogicalAnd.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LogicalAnd.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void LogicalAnd::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LogicalAnd::LogicalAnd(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LogicalNot.cc b/runtime/onert/core/src/ir/operation/LogicalNot.cc
deleted file mode 100644
index 8f1142102..000000000
--- a/runtime/onert/core/src/ir/operation/LogicalNot.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LogicalNot.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void LogicalNot::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LogicalNot::LogicalNot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LogicalOr.cc b/runtime/onert/core/src/ir/operation/LogicalOr.cc
deleted file mode 100644
index d75207c4a..000000000
--- a/runtime/onert/core/src/ir/operation/LogicalOr.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LogicalOr.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void LogicalOr::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LogicalOr::LogicalOr(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Logistic.cc b/runtime/onert/core/src/ir/operation/Logistic.cc
deleted file mode 100644
index 77d9d17de..000000000
--- a/runtime/onert/core/src/ir/operation/Logistic.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Logistic.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Logistic::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Logistic::Logistic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Max.cc b/runtime/onert/core/src/ir/operation/Max.cc
deleted file mode 100644
index 281f9d451..000000000
--- a/runtime/onert/core/src/ir/operation/Max.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Max.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Max::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Max::Max(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/MaxPool2D.cc b/runtime/onert/core/src/ir/operation/MaxPool2D.cc
deleted file mode 100644
index eac53cc5e..000000000
--- a/runtime/onert/core/src/ir/operation/MaxPool2D.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/MaxPool2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void MaxPool2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-MaxPool2D::MaxPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-                     const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Min.cc b/runtime/onert/core/src/ir/operation/Min.cc
deleted file mode 100644
index 8be7f0cc8..000000000
--- a/runtime/onert/core/src/ir/operation/Min.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Min.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Min::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Min::Min(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Mul.cc b/runtime/onert/core/src/ir/operation/Mul.cc
deleted file mode 100644
index 03cdf1b61..000000000
--- a/runtime/onert/core/src/ir/operation/Mul.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Mul.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Mul::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Mul::Mul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-         const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Neg.cc b/runtime/onert/core/src/ir/operation/Neg.cc
deleted file mode 100644
index df623a13b..000000000
--- a/runtime/onert/core/src/ir/operation/Neg.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Neg.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Neg::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Neg::Neg(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Pad.cc b/runtime/onert/core/src/ir/operation/Pad.cc
index aecc2d994..0c56e92e3 100644
--- a/runtime/onert/core/src/ir/operation/Pad.cc
+++ b/runtime/onert/core/src/ir/operation/Pad.cc
@@ -27,8 +27,10 @@ namespace operation
 
 void Pad::accept(OperationVisitor &v) const { v.visit(*this); }
 
+// PAD: 2 inputs
+// PADV2: 3 inputs
 Pad::Pad(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+    : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/Sin.cc b/runtime/onert/core/src/ir/operation/Pool2D.cc
index 631505f36..761d14c3d 100644
--- a/runtime/onert/core/src/ir/operation/Sin.cc
+++ b/runtime/onert/core/src/ir/operation/Pool2D.cc
@@ -14,9 +14,10 @@
  * limitations under the License.
  */
 
-#include "ir/operation/Sin.h"
+#include "ir/operation/Pool2D.h"
 
 #include <cassert>
+#include <unordered_map>
 
 #include "ir/OperationVisitor.h"
 
@@ -27,13 +28,24 @@ namespace ir
 namespace operation
 {
 
-void Sin::accept(OperationVisitor &v) const { v.visit(*this); }
+void Pool2D::accept(OperationVisitor &v) const { v.visit(*this); }
 
-Sin::Sin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+Pool2D::Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+               const Param &param)
+    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
 {
 }
 
+std::string Pool2D::name() const
+{
+  using PoolType = onert::ir::operation::Pool2D::PoolType;
+  static const std::unordered_map<PoolType, std::string> name_map{
+      {PoolType::AVG, "Avg" + std::string{toString(opcode())}},
+      {PoolType::L2, "L2" + std::string{toString(opcode())}},
+      {PoolType::MAX, "Max" + std::string{toString(opcode())}}};
+  return name_map.at(_param.op_type);
+}
+
 } // namespace operation
 } // namespace ir
 } // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/RSQRT.cc b/runtime/onert/core/src/ir/operation/RSQRT.cc
deleted file mode 100644
index 2bce1fa28..000000000
--- a/runtime/onert/core/src/ir/operation/RSQRT.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/RSQRT.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void RSQRT::accept(OperationVisitor &v) const { v.visit(*this); }
-
-RSQRT::RSQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Cos.cc b/runtime/onert/core/src/ir/operation/Rank.cc
index 831a92dbd..c357e9018 100644
--- a/runtime/onert/core/src/ir/operation/Cos.cc
+++ b/runtime/onert/core/src/ir/operation/Rank.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ir/operation/Cos.h"
+#include "ir/operation/Rank.h"
 
 #include <cassert>
 
@@ -27,9 +27,9 @@ namespace ir
 namespace operation
 {
 
-void Cos::accept(OperationVisitor &v) const { v.visit(*this); }
+void Rank::accept(OperationVisitor &v) const { v.visit(*this); }
 
-Cos::Cos(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+Rank::Rank(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
     : Operation{OperandConstraint::createExact(1u), inputs, outputs}
 {
 }
diff --git a/runtime/onert/core/src/ir/operation/ReLU.cc b/runtime/onert/core/src/ir/operation/ReLU.cc
deleted file mode 100644
index f0c88478b..000000000
--- a/runtime/onert/core/src/ir/operation/ReLU.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReLU.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReLU::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReLU::ReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ReLU1.cc b/runtime/onert/core/src/ir/operation/ReLU1.cc
deleted file mode 100644
index 734f0b65b..000000000
--- a/runtime/onert/core/src/ir/operation/ReLU1.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReLU1.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReLU1::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReLU1::ReLU1(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ReLU6.cc b/runtime/onert/core/src/ir/operation/ReLU6.cc
deleted file mode 100644
index 5972329af..000000000
--- a/runtime/onert/core/src/ir/operation/ReLU6.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReLU6.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReLU6::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReLU6::ReLU6(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Round.cc b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
index 16dfb2b1b..9f17af97c 100644
--- a/runtime/onert/core/src/ir/operation/Round.cc
+++ b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ir/operation/Round.h"
+#include "ir/operation/ResizeNearestNeighbor.h"
 
 #include <cassert>
 
@@ -27,10 +27,12 @@ namespace ir
 namespace operation
 {
 
-void Round::accept(OperationVisitor &v) const { v.visit(*this); }
+void ResizeNearestNeighbor::accept(OperationVisitor &v) const { v.visit(*this); }
 
-Round::Round(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+ResizeNearestNeighbor::ResizeNearestNeighbor(const OperandIndexSequence &inputs,
+                                             const OperandIndexSequence &outputs,
+                                             const Param &param)
+    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
 {
 }
 
diff --git a/runtime/onert/core/src/ir/operation/SQRT.cc b/runtime/onert/core/src/ir/operation/SQRT.cc
deleted file mode 100644
index ad887d89a..000000000
--- a/runtime/onert/core/src/ir/operation/SQRT.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/SQRT.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void SQRT::accept(OperationVisitor &v) const { v.visit(*this); }
-
-SQRT::SQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Sub.cc b/runtime/onert/core/src/ir/operation/Sub.cc
deleted file mode 100644
index d71071686..000000000
--- a/runtime/onert/core/src/ir/operation/Sub.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Sub.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Sub::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Sub::Sub(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-         const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Tanh.cc b/runtime/onert/core/src/ir/operation/Tanh.cc
deleted file mode 100644
index 8fab0c0f3..000000000
--- a/runtime/onert/core/src/ir/operation/Tanh.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Tanh.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Tanh::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Tanh::Tanh(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ZerosLike.cc b/runtime/onert/core/src/ir/operation/ZerosLike.cc
deleted file mode 100644
index 5f49b98d1..000000000
--- a/runtime/onert/core/src/ir/operation/ZerosLike.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ZerosLike.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ZerosLike::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ZerosLike::ZerosLike(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/PermutationOperationPass.h b/runtime/onert/core/src/ir/pass/PermutationOperationPass.h
deleted file mode 100644
index 6dec9ea8f..000000000
--- a/runtime/onert/core/src/ir/pass/PermutationOperationPass.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
-#define __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
-
-#include "ir/OperationVisitor.h"
-#include "LoweredOperationPass.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace pass
-{
-
-class PermutationOperationPass : public LoweredOperationPass, public OperationVisitor
-{
-public:
-  using LoweredOperationPass::LoweredOperationPass;
-
-public:
-  std::string id() final { return "PermutationOperationPass"; }
-
-public:
-  void callback(const OperationIndex &i, Operation &n) final;
-
-public:
-  void visit(const operation::Add &) final;
-  void visit(const operation::Comparison &) final;
-  void visit(const operation::Concat &) final;
-  void visit(const operation::Div &) final;
-  void visit(const operation::LogicalAnd &) final;
-  void visit(const operation::LogicalNot &) final;
-  void visit(const operation::LogicalOr &) final;
-  void visit(const operation::Max &) final;
-  void visit(const operation::Min &) final;
-  void visit(const operation::Mul &) final;
-  void visit(const operation::Pack &) final;
-  void visit(const operation::PReLU &) final;
-  void visit(const operation::SquaredDifference &) final;
-  void visit(const operation::Sub &) final;
-  void visit(const operation::Unpack &) final;
-  void visit(const operation::FullyConnected &) final;
-  void visit(const operation::Gather &) final;
-  void visit(const operation::Reshape &) final;
-
-private:
-  void applyExpandRanks(const Operation &);
-  void changeToKeepLayout(const Operation &);
-};
-
-} // namespace pass
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
diff --git a/runtime/onert/core/src/util/EventRecorder.cc b/runtime/onert/core/src/util/EventRecorder.cc
index ec7f92117..13a599bed 100644
--- a/runtime/onert/core/src/util/EventRecorder.cc
+++ b/runtime/onert/core/src/util/EventRecorder.cc
@@ -21,7 +21,12 @@
 #include <unordered_map>
 #include <json/json.h>
 #include <assert.h>
+#include <utility>
+#include <map>
+#include <set>
+#include <stdint.h>
 
+// json type for Chrome Event Trace
 namespace
 {
 
@@ -110,6 +115,290 @@ std::string object(const CounterEvent &evt)
 
 } // namespace
 
+// md table type
+namespace
+{
+
+void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list)
+{
+  os << "| ";
+  for (auto &key : list)
+  {
+    os << key << " | ";
+  }
+  os << "\n";
+}
+
+struct MDContent
+{
+  std::string name;
+  uint64_t begin_ts;
+  uint64_t end_ts;
+  uint32_t min_rss;
+  uint32_t max_rss;
+  uint32_t min_page_reclaims;
+  uint32_t max_page_reclaims;
+
+  MDContent()
+      : begin_ts(0), end_ts(0), min_rss(UINT32_MAX), max_rss(0), min_page_reclaims(UINT32_MAX),
+        max_page_reclaims(0)
+  {
+    // DO NOTHING
+  }
+
+  virtual ~MDContent() = default;
+
+  void updateRss(uint32_t rss)
+  {
+    if (min_rss == UINT32_MAX)
+      min_rss = rss;
+    if (max_rss == 0)
+      max_rss = rss;
+
+    if (min_rss > rss)
+      min_rss = rss;
+    else if (max_rss < rss)
+      max_rss = rss;
+  }
+
+  void updateMinflt(uint32_t minflt)
+  {
+    if (min_page_reclaims == UINT32_MAX)
+      min_page_reclaims = minflt;
+    if (max_page_reclaims == 0)
+      max_page_reclaims = minflt;
+
+    if (min_page_reclaims > minflt)
+      min_page_reclaims = minflt;
+    else if (max_page_reclaims < minflt)
+      max_page_reclaims = minflt;
+  }
+
+  virtual void write(std::ostream &os) const = 0;
+};
+
+struct OpSeq : public MDContent
+{
+  std::string backend;
+  uint64_t graph_latency;
+
+  struct OpSeqCmp
+  {
+    bool operator()(const OpSeq &lhs, const OpSeq &rhs) const
+    {
+      return lhs.begin_ts < rhs.begin_ts;
+    }
+    bool operator()(const OpSeq &lhs, const OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
+    bool operator()(OpSeq &lhs, OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
+  };
+
+  void write(std::ostream &os) const override
+  {
+    uint64_t opseq_latency = end_ts - begin_ts;
+    double opseq_per = static_cast<double>(opseq_latency) / graph_latency * 100.0;
+    writeMDTableRow(os, {name, backend, std::to_string(opseq_latency), std::to_string(opseq_per),
+                         std::to_string(min_rss), std::to_string(max_rss),
+                         std::to_string(min_page_reclaims), std::to_string(max_page_reclaims)});
+  }
+};
+
+struct Graph : public MDContent
+{
+  std::set<OpSeq, OpSeq::OpSeqCmp> opseqs;
+
+  void setOpSeqs(const std::map<std::string, OpSeq> &name_to_opseq)
+  {
+    uint64_t graph_latency = end_ts - begin_ts;
+    for (auto it : name_to_opseq)
+    {
+      auto opseq = it.second;
+      opseq.graph_latency = graph_latency;
+
+      opseqs.insert(opseq);
+
+      updateRss(opseq.min_rss);
+      updateRss(opseq.max_rss);
+      updateMinflt(opseq.min_page_reclaims);
+      updateMinflt(opseq.max_page_reclaims);
+    }
+  }
+
+  void write(std::ostream &os) const override
+  {
+    static std::vector<std::string> graph_headers{"latency(us)", "rss_min(kb)", "rss_max(kb)",
+                                                  "page_reclaims_min", "page_reclaims_max"};
+
+    static std::vector<std::string> graph_headers_line{"-----------", "-------", "-------",
+                                                       "-----------------", "-----------------"};
+
+    // Graph's Header
+    writeMDTableRow(os, graph_headers);
+    writeMDTableRow(os, graph_headers_line);
+
+    // Graph's contents
+    writeMDTableRow(os, {std::to_string(end_ts - begin_ts), std::to_string(min_rss),
+                         std::to_string(max_rss), std::to_string(min_page_reclaims),
+                         std::to_string(max_page_reclaims)});
+
+    os << "\n";
+
+    static std::vector<std::string> opseq_headers{
+        "OpSeq name",  "backend",     "latency(us)",       "latency(%)",
+        "rss_min(kb)", "rss_max(kb)", "page_reclaims_min", "page_reclaims_max"};
+
+    static std::vector<std::string> opseq_headers_line{
+        "----------", "-------", "-----------",       "-----------",
+        "-------",    "-------", "-----------------", "-----------------"};
+
+    os << "## OpSequences \n";
+
+    // OpSeq's Header
+    writeMDTableRow(os, opseq_headers);
+    writeMDTableRow(os, opseq_headers_line);
+
+    // OpSeq's contents
+    for (auto opseq : opseqs)
+    {
+      opseq.write(os);
+    }
+
+    os << "\n";
+  }
+};
+
+struct MDTableBuilder
+{
+  MDTableBuilder(const std::vector<DurationEvent> &duration_events,
+                 const std::vector<CounterEvent> &counter_events)
+      : _duration_events(duration_events), _counter_events(counter_events)
+  {
+    for (const auto &evt : _counter_events)
+    {
+      uint64_t ts = std::stoull(evt.ts);
+      auto &name = evt.name;
+      assert(name.compare("maxrss") == 0 || name.compare("minflt") == 0);
+      assert(evt.values.size() == 1);
+      auto &val = evt.values.begin()->second;
+      if (_ts_to_values.find(ts) == _ts_to_values.end())
+      {
+        std::pair<uint32_t, uint32_t> values;
+        if (name.compare("maxrss") == 0)
+          values.first = std::stoul(val);
+        else
+          values.second = std::stoul(val);
+        _ts_to_values.insert({ts, values});
+      }
+      else
+      {
+        auto &values = _ts_to_values.at(ts);
+        if (name.compare("maxrss") == 0)
+          values.first = std::stoul(val);
+        else
+          values.second = std::stoul(val);
+      }
+    }
+  }
+
+  MDTableBuilder &build()
+  {
+    for (auto &it : divideGraph())
+    {
+      size_t begin_idx = it.first;
+      size_t end_idx = it.second;
+      std::map<std::string, OpSeq> name_to_opseq;
+      for (size_t i = begin_idx + 1; i < end_idx; ++i)
+      {
+        const auto &evt = _duration_events[i];
+        assert(evt.name.compare("Graph") != 0);
+        assert(evt.ph.compare("B") == 0 || evt.ph.compare("E") == 0);
+        if (evt.ph.compare("B") == 0)
+        {
+          assert(name_to_opseq.find(evt.name) == name_to_opseq.end());
+          name_to_opseq.insert({evt.name, makeOpSeq(evt)});
+        }
+        else
+        {
+          assert(name_to_opseq.find(evt.name) != name_to_opseq.end());
+          auto &opseq = name_to_opseq.at(evt.name);
+          updateOpSeq(opseq, evt);
+        }
+      }
+
+      _graphs.emplace_back(makeGraph(begin_idx, end_idx, name_to_opseq));
+    }
+
+    return *this;
+  }
+
+  std::vector<std::pair<size_t, size_t>> divideGraph()
+  {
+    std::vector<std::pair<size_t, size_t>> graph_idx_list; // pair<begin_idx, end_idx>
+    for (size_t i = 0, begin_idx = 0; i < _duration_events.size(); ++i)
+    {
+      const auto &evt = _duration_events.at(i);
+      if (evt.name.compare("Graph") == 0)
+      {
+        if (evt.ph.compare("B") == 0)
+          begin_idx = i;
+        else
+          graph_idx_list.emplace_back(begin_idx, i);
+      }
+    }
+    return graph_idx_list;
+  }
+
+  OpSeq makeOpSeq(const DurationEvent &evt)
+  {
+    OpSeq opseq;
+    opseq.name = evt.name;
+    opseq.begin_ts = std::stoull(evt.ts);
+    opseq.updateRss(_ts_to_values.at(opseq.begin_ts).first);
+    opseq.updateMinflt(_ts_to_values.at(opseq.begin_ts).second);
+    opseq.backend = evt.tid;
+    return opseq;
+  }
+
+  void updateOpSeq(OpSeq &opseq, const DurationEvent &evt)
+  {
+    opseq.end_ts = std::stoull(evt.ts);
+    opseq.updateRss(_ts_to_values.at(opseq.end_ts).first);
+    opseq.updateMinflt(_ts_to_values.at(opseq.end_ts).second);
+  }
+
+  Graph makeGraph(size_t begin_idx, size_t end_idx,
+                  const std::map<std::string, OpSeq> &name_to_opseq)
+  {
+    Graph graph;
+    graph.name = "Graph";
+    graph.begin_ts = std::stoull(_duration_events[begin_idx].ts);
+    graph.updateRss(_ts_to_values.at(graph.begin_ts).first);
+    graph.updateMinflt(_ts_to_values.at(graph.begin_ts).second);
+    graph.end_ts = std::stoull(_duration_events[end_idx].ts);
+    graph.updateRss(_ts_to_values.at(graph.end_ts).first);
+    graph.updateMinflt(_ts_to_values.at(graph.end_ts).second);
+    graph.setOpSeqs(name_to_opseq);
+    return graph;
+  }
+
+  void write(std::ostream &os)
+  {
+    // Write contents
+    for (size_t i = 0; i < _graphs.size(); ++i)
+    {
+      os << "# Graph " << i << "\n";
+      _graphs.at(i).write(os);
+    }
+  }
+
+  const std::vector<DurationEvent> &_duration_events;
+  const std::vector<CounterEvent> &_counter_events;
+  // timestamp to std::pair<maxrss, minflt>
+  std::unordered_map<uint64_t, std::pair<uint32_t, uint32_t>> _ts_to_values;
+  std::vector<Graph> _graphs;
+};
+
+} // namespace
+
 void EventRecorder::emit(const DurationEvent &evt)
 {
   std::lock_guard<std::mutex> lock{_mu};
@@ -136,6 +425,9 @@ void EventRecorder::writeToFile(std::ostream &os)
     case WriteFormat::SNPE_BENCHMARK:
       writeSNPEBenchmark(os);
       break;
+    case WriteFormat::MD_TABLE:
+      writeMDTable(os);
+      break;
     default:
       assert(!"Invalid value");
       break;
@@ -258,3 +550,8 @@ void EventRecorder::writeChromeTrace(std::ostream &os)
   os << "  ]\n";
   os << "}\n";
 }
+
+void EventRecorder::writeMDTable(std::ostream &os)
+{
+  MDTableBuilder(_duration_events, _counter_events).build().write(os);
+}
diff --git a/runtime/onert/core/src/util/EventRecorder.h b/runtime/onert/core/src/util/EventRecorder.h
index 6eea06986..37ec1a0f1 100644
--- a/runtime/onert/core/src/util/EventRecorder.h
+++ b/runtime/onert/core/src/util/EventRecorder.h
@@ -53,7 +53,8 @@ public:
   enum class WriteFormat
   {
     CHROME_TRACING,
-    SNPE_BENCHMARK
+    SNPE_BENCHMARK,
+    MD_TABLE,
   };
 
 public:
@@ -71,6 +72,7 @@ public:
 private:
   void writeSNPEBenchmark(std::ostream &os);
   void writeChromeTrace(std::ostream &os);
+  void writeMDTable(std::ostream &os);
 
 private:
   std::mutex _mu;
diff --git a/runtime/onert/core/src/util/ShapeInference.cc b/runtime/onert/core/src/util/ShapeInference.cc
index 9a24f8c1a..95c15049d 100644
--- a/runtime/onert/core/src/util/ShapeInference.cc
+++ b/runtime/onert/core/src/util/ShapeInference.cc
@@ -18,8 +18,6 @@
 #include "util/Utils.h"
 #include "ir/InternalType.h"
 #include "ir/Shape.h"
-#include "ir/operation/AvgPool2D.h"
-#include "ir/operation/MaxPool2D.h"
 #include "util/ShapeInference.h"
 #include "util/logging.h"
 
@@ -81,10 +79,12 @@ ir::Shape broadcastShapes(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape
 // Calculate output height and width of convolution-like operation
 std::pair<int, int> calcConvLikeHeightAndWidth(const int in_h, const int in_w, const int ker_h,
                                                const int ker_w, const ir::Padding pad,
-                                               const ir::Stride stride)
+                                               const ir::Stride stride,
+                                               const ir::Dilation dilation = {1, 1})
 {
   int32_t out_h = 0, out_w = 0;
-
+  int32_t effective_filter_w_size = (ker_w - 1) * dilation.width_factor + 1;
+  int32_t effective_filter_h_size = (ker_h - 1) * dilation.height_factor + 1;
   switch (pad.type)
   {
     case ir::PaddingType::SAME:
@@ -92,12 +92,15 @@ std::pair<int, int> calcConvLikeHeightAndWidth(const int in_h, const int in_w, c
       out_w = ceil_div(in_w, stride.horizontal);
       break;
     case ir::PaddingType::VALID:
-      out_h = ceil_div(in_h - ker_h + 1, stride.vertical);
-      out_w = ceil_div(in_w - ker_w + 1, stride.horizontal);
+      out_h = ceil_div(in_h - effective_filter_h_size + 1, stride.vertical);
+      out_w = ceil_div(in_w - effective_filter_w_size + 1, stride.horizontal);
       break;
     case ir::PaddingType::EXPLICIT:
-      out_h = (in_h + pad.param.top + pad.param.bottom - ker_h) / stride.vertical + 1;
-      out_w = (in_w + pad.param.left + pad.param.right - ker_w) / stride.horizontal + 1;
+      out_h =
+          (in_h + pad.param.top + pad.param.bottom - effective_filter_h_size) / stride.vertical + 1;
+      out_w =
+          (in_w + pad.param.left + pad.param.right - effective_filter_w_size) / stride.horizontal +
+          1;
       break;
     default:
       assert(false);
@@ -126,17 +129,6 @@ ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank)
   return out_shape;
 }
 
-ir::Shape inferAvgPoolShape(const ir::Shape &in_shape, const ir::operation::AvgPool2D::Param &param,
-                            const ir::Layout layout)
-{
-  assert(layout == ir::Layout::NHWC);
-  auto ifm_shape = in_shape.asFeature(layout);
-  const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
-                                                  param.padding, param.stride);
-  // Pooling don't change number of channels and batch size
-  return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C};
-}
-
 ir::Shape inferReduceShape(const ir::Shape &input_shape, const std::vector<int> &axes,
                            bool keep_dims)
 {
@@ -320,7 +312,7 @@ ir::Shape inferConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape
   assert(ifm_shape.C == kf_shape.C);
 
   const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, kf_shape.H, kf_shape.W,
-                                                  param.padding, param.stride);
+                                                  param.padding, param.stride, param.dilation);
 
   return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, kf_shape.N};
 }
@@ -411,17 +403,6 @@ ir::Shape inferGatherShape(const ir::Shape &input_shape, const ir::Shape &indice
   return out_shape;
 }
 
-ir::Shape inferMaxPoolShape(const ir::Shape &in_shape, const ir::operation::MaxPool2D::Param &param,
-                            const ir::Layout layout)
-{
-  assert(layout == ir::Layout::NHWC);
-  auto ifm_shape = in_shape.asFeature(layout);
-  const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
-                                                  param.padding, param.stride);
-  // Pooling don't change number of channels and batch size
-  return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C};
-}
-
 ir::Shape inferOnehotShape(const ir::Shape &input_shape, const int depth, int axis)
 {
   assert(depth >= 0);
@@ -486,6 +467,17 @@ ir::Shape inferPadShape(const ir::Shape &in_shape, const int32_t *pad_buf, const
   return ret;
 }
 
+ir::Shape inferPoolShape(const ir::Shape &in_shape, const ir::operation::Pool2D::Param &param,
+                         const ir::Layout layout)
+{
+  assert(layout == ir::Layout::NHWC);
+  auto ifm_shape = in_shape.asFeature(layout);
+  const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
+                                                  param.padding, param.stride);
+  // Pooling don't change number of channels and batch size
+  return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C};
+}
+
 ir::Shape inferResizeBilinearShape(const ir::Shape &in_shape, const int32_t output_height,
                                    const int32_t output_width)
 {
diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h
index 0f6a2a5d0..480452e01 100644
--- a/runtime/onert/frontend/base_loader/include/base_loader.h
+++ b/runtime/onert/frontend/base_loader/include/base_loader.h
@@ -105,40 +105,39 @@ protected:
   template <typename Param, typename OptionsType>
   void loadStridesAndPaddings(Param &param, const OptionsType *options);
   // Load Pool2D param
-  template <typename Param> void loadPool2D(Param &param, const Pool2DOptions *options);
+  template <typename Param> void loadPool2DOptions(Param &param, const Pool2DOptions *options);
 
   // Operations
   void loadConv2D(const Operator *op, ir::Graph &subg);
   void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg);
   void loadTransposeConv(const Operator *op, ir::Graph &subg);
-  void loadAvgPool2D(const Operator *op, ir::Graph &subg);
+  void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type);
   void loadReshape(const Operator *op, ir::Graph &subg);
   void loadSoftmax(const Operator *op, ir::Graph &subg);
-  void loadMaxPool2D(const Operator *op, ir::Graph &subg);
   void loadConcatenation(const Operator *op, ir::Graph &subg);
   void loadFill(const Operator *op, ir::Graph &subg);
   void loadFC(const Operator *op, ir::Graph &subg);
-  void loadAdd(const Operator *op, ir::Graph &subg);
-  void loadSub(const Operator *op, ir::Graph &subg);
-  void loadMul(const Operator *op, ir::Graph &subg);
-  void loadDiv(const Operator *op, ir::Graph &subg);
+  template <ir::operation::BinaryArithmetic::ArithmeticType op_type>
+  void loadBinaryArithmetic(const Operator *op, ir::Graph &subg);
+  void loadAddV2(const Operator *op, ir::Graph &subg);
   void loadPack(const Operator *op, ir::Graph &subg);
-  void loadRelu(const Operator *op, ir::Graph &subg);
-  void loadRelu6(const Operator *op, ir::Graph &subg);
   void loadResizeBilinear(const Operator *op, ir::Graph &subg);
-  void loadRsqrt(const Operator *op, ir::Graph &subg);
+  void loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg);
   void loadSelect(const Operator *op, ir::Graph &subg);
-  void loadSqrt(const Operator *op, ir::Graph &subg);
   void loadSquaredDifference(const Operator *op, ir::Graph &subg);
-  void loadTanh(const Operator *op, ir::Graph &subg);
   void loadTranspose(const Operator *op, ir::Graph &subg);
-  void loadReduce(const Operator *op, ir::Graph &subg,
-                  ir::operation::Reduce::ReduceType reduce_type);
+  template <ir::operation::Reduce::ReduceType reduce_type>
+  void loadReduce(const Operator *op, ir::Graph &subg);
   void loadReduceAll(const Operator *op, ir::Graph &subg);
   void loadReverseV2(const Operator *op, ir::Graph &subg);
   void loadPad(const Operator *op, ir::Graph &subg);
-  void loadLogistic(const Operator *op, ir::Graph &subg);
-  void loadExp(const Operator *op, ir::Graph &subg);
+  void loadElementwiseActivation(const Operator *op, ir::Graph &subg,
+                                 ir::operation::ElementwiseActivation::Type op_type,
+                                 float alpha = 0.f, float beta = 0.f);
+  template <ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type>
+  void loadElementwiseBinary(const Operator *op, ir::Graph &subg);
+  void loadElementwiseUnary(const Operator *op, ir::Graph &subg,
+                            ir::operation::ElementwiseUnary::Type op_type);
   void loadExpandDims(const Operator *op, ir::Graph &subg);
   void loadGather(const Operator *op, ir::Graph &subg);
   void loadCustom(const Operator *op, ir::Graph &subg);
@@ -152,35 +151,25 @@ protected:
   void loadSlice(const Operator *op, ir::Graph &subg);
   void loadStridedSlice(const Operator *op, ir::Graph &subg);
   void loadUnpack(const Operator *op, ir::Graph &subg);
-  void loadMinimum(const Operator *op, ir::Graph &subg);
-  void loadMaximum(const Operator *op, ir::Graph &subg);
-  void loadCast(const Operator *op, ir::Graph &subg);
   void loadComparison(const Operator *op, ir::Graph &subg);
   void loadEinsum(const Operator *op, ir::Graph &subg);
   void loadOneHot(const Operator *op, ir::Graph &subg);
-  void loadAbs(const Operator *op, ir::Graph &subg);
-  void loadCos(const Operator *op, ir::Graph &subg);
-  void loadSin(const Operator *op, ir::Graph &subg);
   void loadShape(const Operator *op, ir::Graph &subg);
   void loadIf(const Operator *op, ir::Graph &subg);
   void loadWhile(const Operator *op, ir::Graph &subg);
-  void loadNeg(const Operator *op, ir::Graph &subg);
-  void loadLog(const Operator *op, ir::Graph &subg);
   void loadArgMax(const Operator *op, ir::Graph &subg);
-  void loadRound(const Operator *op, ir::Graph &subg);
   void loadPow(const Operator *op, ir::Graph &subg);
-  void loadLogicalNot(const Operator *op, ir::Graph &subg);
-  void loadZerosLike(const Operator *op, ir::Graph &subg);
   void loadTile(const Operator *op, ir::Graph &subg);
-  void loadLogicalOr(const Operator *op, ir::Graph &subg);
   void loadRange(const Operator *op, ir::Graph &subg);
+  void loadRank(const Operator *op, ir::Graph &subg);
   void loadMatrixBandPart(const Operator *op, ir::Graph &subg);
   void loadBroadcastTo(const Operator *op, ir::Graph &subg);
   void loadFusedBatchNorm(const Operator *op, ir::Graph &subg);
   void loadLogSoftmax(const Operator *op, ir::Graph &subg);
-  void loadQuantize(const Operator *op, ir::Graph &subg);
   void loadSpaceToDepth(const Operator *op, ir::Graph &subg);
   void loadStatelessRandomUniform(const Operator *op, ir::Graph &subg);
+  void loadL2Normalization(const Operator *op, ir::Graph &subg);
+  void loadLeakyRelu(const Operator *op, ir::Graph &subg);
 
 protected:
   // Base address for mapped region for loading (if needed)
@@ -194,6 +183,7 @@ protected:
   const Model *_model;
   // Maps Tensor indices to onert Operands.
   std::vector<ir::OperandIndex> _tensor_to_operand;
+  std::unordered_map<ir::OperandIndex, std::string> _tensor_names;
   // Verifier
   std::unique_ptr<Verifier> _verifier;
 };
@@ -466,8 +456,8 @@ ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Ten
     subg.setOperandValue(operand_index, std::move(data_obj));
   }
 
-  // Name unused
-  // auto name = tensor->name();
+  _tensor_names.emplace(operand_index, tensor->name()->str());
+
   // Variablie
   if (tensor->is_variable())
     throw std::runtime_error("Variable tensor not supported!");
@@ -518,8 +508,8 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadStridesAndPaddings(Param &par
 
 template <typename LoaderDomain, typename SpecificLoader>
 template <typename Param>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2D(Param &param,
-                                                          const Pool2DOptions *options)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2DOptions(Param &param,
+                                                                 const Pool2DOptions *options)
 {
   // Strides and Paddings
   loadStridesAndPaddings(param, options);
@@ -543,7 +533,10 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadConv2D(const Operator *op, ir
   const auto *options = op->builtin_options_as_Conv2DOptions();
   param.activation = convertActivation(options->fused_activation_function());
   loadStridesAndPaddings(param, options);
-  // Dilation h/w factor unused
+
+  param.dilation.width_factor = options->dilation_w_factor();
+  param.dilation.height_factor = options->dilation_h_factor();
+
   std::unique_ptr<ir::Operation> new_op(new ir::operation::Conv2D(inputs, outputs, param));
   subg.addOperation(std::move(new_op));
 }
@@ -585,19 +578,21 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadTransposeConv(const Operator
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadAvgPool2D(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2D(const Operator *op, ir::Graph &subg,
+                                                          ir::operation::Pool2D::PoolType op_type)
 {
   ir::OperandIndexSequence inputs;
   ir::OperandIndexSequence outputs;
 
   loadOperationIO(op, inputs, outputs);
 
-  ir::operation::AvgPool2D::Param param;
+  ir::operation::Pool2D::Param param;
+  param.op_type = op_type;
   const auto *options = op->builtin_options_as_Pool2DOptions();
 
-  loadPool2D(param, options);
+  loadPool2DOptions(param, options);
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::AvgPool2D(inputs, outputs, param));
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::Pool2D(inputs, outputs, param));
   subg.addOperation(std::move(new_op));
 }
 
@@ -645,23 +640,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadSoftmax(const Operator *op, i
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMaxPool2D(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  ir::operation::MaxPool2D::Param param;
-  const auto *options = op->builtin_options_as_Pool2DOptions();
-
-  loadPool2D(param, options);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::MaxPool2D(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
 void BaseLoader<LoaderDomain, SpecificLoader>::loadConcatenation(const Operator *op,
                                                                  ir::Graph &subg)
 {
@@ -719,70 +697,82 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadFC(const Operator *op, ir::Gr
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadAdd(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  ir::operation::Add::Param param;
-  const auto *options = op->builtin_options_as_AddOptions();
-
-  param.activation = convertActivation(options->fused_activation_function());
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Add(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSub(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  ir::operation::Sub::Param param;
-  const auto *options = op->builtin_options_as_SubOptions();
-
-  param.activation = convertActivation(options->fused_activation_function());
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Sub(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMul(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadAddV2(const Operator *op, ir::Graph &subg)
 {
   ir::OperandIndexSequence inputs;
   ir::OperandIndexSequence outputs;
 
   loadOperationIO(op, inputs, outputs);
 
-  ir::operation::Mul::Param param;
-  const auto *options = op->builtin_options_as_MulOptions();
+  ir::operation::BinaryArithmetic::Param param;
+  param.arithmetic_type = ir::operation::BinaryArithmetic::ArithmeticType::ADD;
 
-  param.activation = convertActivation(options->fused_activation_function());
+  if (op->custom_options() == nullptr)
+  {
+    param.activation = ir::Activation::NONE;
+  }
+  else
+  {
+    size_t custom_op_data_size = op->custom_options()->size();
+    auto custom_op_data = op->custom_options()->Data();
+    auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
+    auto attr_map = data_root.AsMap();
+    const auto fused_activation_func = static_cast<typename LoaderDomain::ActivationFunctionType>(
+        attr_map["fused_activation_function"].AsInt8());
+    param.activation = convertActivation(fused_activation_func);
+  }
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Mul(inputs, outputs, param));
+  std::unique_ptr<ir::Operation> new_op(
+      new ir::operation::BinaryArithmetic(inputs, outputs, param));
   subg.addOperation(std::move(new_op));
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadDiv(const Operator *op, ir::Graph &subg)
+template <ir::operation::BinaryArithmetic::ArithmeticType op_type>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadBinaryArithmetic(const Operator *op,
+                                                                    ir::Graph &subg)
 {
   ir::OperandIndexSequence inputs;
   ir::OperandIndexSequence outputs;
 
   loadOperationIO(op, inputs, outputs);
 
-  ir::operation::Div::Param param;
-  const auto *options = op->builtin_options_as_DivOptions();
-
-  param.activation = convertActivation(options->fused_activation_function());
+  ir::operation::BinaryArithmetic::Param param;
+  param.arithmetic_type = op_type;
+  switch (op_type)
+  {
+    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+    {
+      const auto *add_options = op->builtin_options_as_AddOptions();
+      param.activation = convertActivation(add_options->fused_activation_function());
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+    {
+      const auto *sub_options = op->builtin_options_as_SubOptions();
+      param.activation = convertActivation(sub_options->fused_activation_function());
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+    {
+      const auto *mul_options = op->builtin_options_as_MulOptions();
+      param.activation = convertActivation(mul_options->fused_activation_function());
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+    {
+      const auto *div_options = op->builtin_options_as_DivOptions();
+      param.activation = convertActivation(div_options->fused_activation_function());
+      break;
+    }
+    default:
+      assert(false &&
+             "The function 'loadBinaryArithmetic' supports only BinaryArithmetic operations");
+      break;
+  }
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Div(inputs, outputs, param));
+  std::unique_ptr<ir::Operation> new_op(
+      new ir::operation::BinaryArithmetic(inputs, outputs, param));
   subg.addOperation(std::move(new_op));
 }
 
@@ -805,26 +795,22 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadPack(const Operator *op, ir::
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRelu(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseActivation(
+    const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type,
+    float alpha, float beta)
 {
   ir::OperandIndexSequence inputs;
   ir::OperandIndexSequence outputs;
 
   loadOperationIO(op, inputs, outputs);
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::ReLU(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
+  ir::operation::ElementwiseActivation::Param param;
+  param.op_type = op_type;
+  param.alpha = alpha;
+  param.beta = beta;
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRelu6(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::ReLU6(inputs, outputs));
+  std::unique_ptr<ir::Operation> new_op(
+      new ir::operation::ElementwiseActivation(inputs, outputs, param));
   subg.addOperation(std::move(new_op));
 }
 
@@ -856,38 +842,40 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadResizeBilinear(const Operator
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRsqrt(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadResizeNearestNeighbor(const Operator *op,
+                                                                         ir::Graph &subg)
 {
   ir::OperandIndexSequence inputs;
   ir::OperandIndexSequence outputs;
 
   loadOperationIO(op, inputs, outputs);
+  auto input = inputs.at(0);
+  auto size = inputs.at(1);
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::RSQRT(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
+  if (!subg.operands().at(size).isConstant())
+    throw std::runtime_error("ResizeNearestNeighbor: non-constant 'size' is not supported.");
 
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSelect(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
+  std::vector<std::int32_t> size_v = subg.operands().at(size).template asVector<std::int32_t>();
 
-  loadOperationIO(op, inputs, outputs);
+  ir::operation::ResizeNearestNeighbor::Param param;
+  param.height_out = size_v[0];
+  param.width_out = size_v[1];
+  param.align_corners = op->builtin_options_as_ResizeNearestNeighborOptions()->align_corners();
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Select(inputs, outputs));
+  std::unique_ptr<ir::Operation> new_op(
+      new ir::operation::ResizeNearestNeighbor({input}, outputs, param));
   subg.addOperation(std::move(new_op));
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSqrt(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadSelect(const Operator *op, ir::Graph &subg)
 {
   ir::OperandIndexSequence inputs;
   ir::OperandIndexSequence outputs;
 
   loadOperationIO(op, inputs, outputs);
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::SQRT(inputs, outputs));
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::Select(inputs, outputs));
   subg.addOperation(std::move(new_op));
 }
 
@@ -905,18 +893,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadSquaredDifference(const Opera
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadTanh(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Tanh(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
 void BaseLoader<LoaderDomain, SpecificLoader>::loadTranspose(const Operator *op, ir::Graph &subg)
 {
   ir::OperandIndexSequence inputs;
@@ -937,8 +913,8 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadTranspose(const Operator *op,
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadReduce(
-    const Operator *op, ir::Graph &subg, ir::operation::Reduce::ReduceType reduce_type)
+template <ir::operation::Reduce::ReduceType reduce_type>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadReduce(const Operator *op, ir::Graph &subg)
 {
   ir::OperandIndexSequence inputs;
   ir::OperandIndexSequence outputs;
@@ -1005,26 +981,49 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadPad(const Operator *op, ir::G
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLogistic(const Operator *op, ir::Graph &subg)
+template <ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseBinary(const Operator *op,
+                                                                     ir::Graph &subg)
 {
   ir::OperandIndexSequence inputs;
   ir::OperandIndexSequence outputs;
 
   loadOperationIO(op, inputs, outputs);
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Logistic(inputs, outputs));
+  ir::operation::ElementwiseBinary::Param param;
+  param.op_type = op_type;
+
+  std::unique_ptr<ir::Operation> new_op(
+      new ir::operation::ElementwiseBinary(inputs, outputs, param));
   subg.addOperation(std::move(new_op));
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadExp(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseUnary(
+    const Operator *op, ir::Graph &subg, ir::operation::ElementwiseUnary::Type op_type)
 {
   ir::OperandIndexSequence inputs;
   ir::OperandIndexSequence outputs;
 
   loadOperationIO(op, inputs, outputs);
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Exp(inputs, outputs));
+  ir::operation::ElementwiseUnary::Param param;
+  param.op_type = op_type;
+
+  if (op_type == ir::operation::ElementwiseUnary::Type::CAST)
+  {
+    auto qasymm8ToUint8 = [](ir::Operand &operand) {
+      if (operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM)
+      {
+        operand.type(ir::DataType::UINT8);
+      }
+    };
+    qasymm8ToUint8(subg.operands().at(inputs.at(ir::operation::ElementwiseUnary::Input::INPUT)));
+    qasymm8ToUint8(subg.operands().at(outputs.at(0)));
+  }
+
+  std::unique_ptr<ir::Operation> new_op(
+      new ir::operation::ElementwiseUnary(inputs, outputs, param));
   subg.addOperation(std::move(new_op));
 }
 
@@ -1177,6 +1176,17 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadStatelessRandomUniform(const
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadRank(const Operator *op, ir::Graph &subg)
+{
+  ir::OperandIndexSequence inputs;
+  ir::OperandIndexSequence outputs;
+  loadOperationIO(op, inputs, outputs);
+
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::Rank(inputs, outputs));
+  subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
 void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir::Graph &subg)
 {
   ir::OperandIndexSequence inputs;
@@ -1197,7 +1207,8 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
     Einsum,
     BroadcastTo,
     FusedBatchNorm,
-    StatelessRandomUniform
+    StatelessRandomUniform,
+    Erf
   };
 
   // Mapping from custom op name string to BuiltinOP enum
@@ -1210,6 +1221,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
       {"FusedBatchNormV3", BuiltinOP::FusedBatchNorm},
       {"BroadcastTo", BuiltinOP::BroadcastTo},
       {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform},
+      {"Erf", BuiltinOP::Erf},
   };
 
   try
@@ -1219,7 +1231,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
     switch (custom_op_id)
     {
       case BuiltinOP::AddV2:
-        loadAdd(op, subg);
+        loadAddV2(op, subg);
         break;
       case BuiltinOP::ReduceAll:
         loadReduceAll(op, subg);
@@ -1242,6 +1254,9 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
       case BuiltinOP::StatelessRandomUniform:
         loadStatelessRandomUniform(op, subg);
         break;
+      case BuiltinOP::Erf:
+        loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ERF);
+        break;
       default:
         throw std::runtime_error{
             "Loader: Custom OP map is defined but operation loader function is not defined"};
@@ -1396,51 +1411,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadUnpack(const Operator *op, ir
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMinimum(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Min(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMaximum(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Max(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadCast(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  auto qasymm8ToUint8 = [](ir::Operand &operand) {
-    if (operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM)
-    {
-      operand.type(ir::DataType::UINT8);
-    }
-  };
-  qasymm8ToUint8(subg.operands().at(inputs.at(ir::operation::Cast::Input::INPUT)));
-  qasymm8ToUint8(subg.operands().at(outputs.at(0)));
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Cast(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
 void BaseLoader<LoaderDomain, SpecificLoader>::loadComparison(const Operator *op, ir::Graph &subg)
 {
   ir::OperandIndexSequence inputs;
@@ -1562,42 +1532,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOneHot(const Operator *op, ir
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadAbs(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Abs(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadCos(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Cos(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSin(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Sin(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
 void BaseLoader<LoaderDomain, SpecificLoader>::loadShape(const Operator *op, ir::Graph &subg)
 {
   ir::OperandIndexSequence inputs;
@@ -1652,18 +1586,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadWhile(const Operator *op, ir:
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadNeg(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Neg(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
 void BaseLoader<LoaderDomain, SpecificLoader>::loadArgMax(const Operator *op, ir::Graph &subg)
 {
   ir::OperandIndexSequence inputs;
@@ -1697,30 +1619,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadArgMax(const Operator *op, ir
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLog(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Log(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRound(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Round(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
 void BaseLoader<LoaderDomain, SpecificLoader>::loadPow(const Operator *op, ir::Graph &subg)
 {
   ir::OperandIndexSequence inputs;
@@ -1733,31 +1631,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadPow(const Operator *op, ir::G
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLogicalNot(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::LogicalNot(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadZerosLike(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::ZerosLike(inputs, outputs));
-
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
 void BaseLoader<LoaderDomain, SpecificLoader>::loadRange(const Operator *op, ir::Graph &subg)
 {
   ir::OperandIndexSequence inputs;
@@ -1787,18 +1660,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadTile(const Operator *op, ir::
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLogicalOr(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::LogicalOr(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
 void BaseLoader<LoaderDomain, SpecificLoader>::loadLogSoftmax(const Operator *op, ir::Graph &subg)
 {
   ir::OperandIndexSequence inputs;
@@ -1817,18 +1678,27 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadLogSoftmax(const Operator *op
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadQuantize(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadL2Normalization(const Operator *op,
+                                                                   ir::Graph &subg)
 {
   ir::OperandIndexSequence inputs;
   ir::OperandIndexSequence outputs;
 
   loadOperationIO(op, inputs, outputs);
 
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Quantize(inputs, outputs));
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::L2Normalization(inputs, outputs));
   subg.addOperation(std::move(new_op));
 }
 
 template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadLeakyRelu(const Operator *op, ir::Graph &subg)
+{
+  float alpha = op->builtin_options_as_LeakyReluOptions()->alpha();
+  loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::LEAKY_RELU, alpha,
+                            1.f);
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
 void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, ir::Graph &subg)
 {
   const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
@@ -1839,7 +1709,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
       loadConv2D(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_AVERAGE_POOL_2D:
-      loadAvgPool2D(op, subg);
+      loadPool2D(op, subg, ir::operation::Pool2D::PoolType::AVG);
       return;
     case BuiltinOperator::BuiltinOperator_DEPTHWISE_CONV_2D:
       loadDepthwiseConv2D(op, subg);
@@ -1854,7 +1724,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
       loadSoftmax(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_MAX_POOL_2D:
-      loadMaxPool2D(op, subg);
+      loadPool2D(op, subg, ir::operation::Pool2D::PoolType::MAX);
       return;
     case BuiltinOperator::BuiltinOperator_CONCATENATION:
       loadConcatenation(op, subg);
@@ -1863,31 +1733,40 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
       loadFC(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_ADD:
-      loadAdd(op, subg);
+      loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::ADD>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_SUB:
-      loadSub(op, subg);
+      loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::SUB>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_MUL:
-      loadMul(op, subg);
+      loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::MUL>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_DIV:
-      loadDiv(op, subg);
+      loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::DIV>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_PACK:
       loadPack(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_RELU:
-      loadRelu(op, subg);
+      loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU,
+                                ir::operation::ElementwiseActivation::infinity, 0.f);
+      return;
+    case BuiltinOperator::BuiltinOperator_RELU_N1_TO_1:
+      loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU, 1.f,
+                                -1.f);
       return;
     case BuiltinOperator::BuiltinOperator_RELU6:
-      loadRelu6(op, subg);
+      loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU, 6.f,
+                                0.f);
       return;
     case BuiltinOperator::BuiltinOperator_RESIZE_BILINEAR:
       loadResizeBilinear(op, subg);
       return;
+    case BuiltinOperator::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR:
+      loadResizeNearestNeighbor(op, subg);
+      return;
     case BuiltinOperator::BuiltinOperator_RSQRT:
-      loadRsqrt(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::RSQRT);
       return;
     case BuiltinOperator::BuiltinOperator_SELECT:
       loadSelect(op, subg);
@@ -1897,37 +1776,39 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
       loadSelect(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_SQRT:
-      loadSqrt(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SQRT);
       return;
     case BuiltinOperator::BuiltinOperator_SQUARED_DIFFERENCE:
       loadSquaredDifference(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_TANH:
-      loadTanh(op, subg);
+      loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::TANH, 1.f,
+                                1.f);
       return;
     case BuiltinOperator::BuiltinOperator_TRANSPOSE:
       loadTranspose(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_MEAN:
-      loadReduce(op, subg, ir::operation::Reduce::ReduceType::MEAN);
+      loadReduce<ir::operation::Reduce::ReduceType::MEAN>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_REDUCE_ANY:
-      loadReduce(op, subg, ir::operation::Reduce::ReduceType::ANY);
+      loadReduce<ir::operation::Reduce::ReduceType::ANY>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_REDUCE_MAX:
-      loadReduce(op, subg, ir::operation::Reduce::ReduceType::MAX);
+      loadReduce<ir::operation::Reduce::ReduceType::MAX>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_REVERSE_V2:
       loadReverseV2(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_PAD:
+    case BuiltinOperator::BuiltinOperator_PADV2:
       loadPad(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_LOGISTIC:
-      loadLogistic(op, subg);
+      loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::LOGISTIC);
       return;
     case BuiltinOperator::BuiltinOperator_EXP:
-      loadExp(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::EXP);
       return;
     case BuiltinOperator::BuiltinOperator_EXPAND_DIMS:
       loadExpandDims(op, subg);
@@ -1942,7 +1823,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
       loadBatchToSpaceND(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_SUM:
-      loadReduce(op, subg, ir::operation::Reduce::ReduceType::SUM);
+      loadReduce<ir::operation::Reduce::ReduceType::SUM>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_CUSTOM:
       loadCustom(op, subg);
@@ -1969,13 +1850,13 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
       loadUnpack(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_MINIMUM:
-      loadMinimum(op, subg);
+      loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_MAXIMUM:
-      loadMaximum(op, subg);
+      loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_CAST:
-      loadCast(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::CAST);
       return;
     case BuiltinOperator::BuiltinOperator_EQUAL:
     case BuiltinOperator::BuiltinOperator_NOT_EQUAL:
@@ -1989,19 +1870,19 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
       loadOneHot(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_ABS:
-      loadAbs(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ABS);
       return;
     case BuiltinOperator::BuiltinOperator_COS:
-      loadCos(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::COS);
       return;
     case BuiltinOperator::BuiltinOperator_SIN:
-      loadSin(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SIN);
       return;
     case BuiltinOperator::BuiltinOperator_SHAPE:
       loadShape(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_REDUCE_PROD:
-      loadReduce(op, subg, ir::operation::Reduce::ReduceType::PROD);
+      loadReduce<ir::operation::Reduce::ReduceType::PROD>(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_IF:
       loadIf(op, subg);
@@ -2010,31 +1891,32 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
       loadWhile(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_NEG:
-      loadNeg(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::NEG);
       return;
     case BuiltinOperator::BuiltinOperator_ARG_MAX:
       loadArgMax(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_LOG:
-      loadLog(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOG);
       return;
     case BuiltinOperator::BuiltinOperator_ROUND:
-      loadRound(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ROUND);
       return;
     case BuiltinOperator::BuiltinOperator_POW:
       loadPow(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_LOGICAL_NOT:
-      loadLogicalNot(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOGICAL_NOT);
       return;
     case BuiltinOperator::BuiltinOperator_LOGICAL_OR:
-      loadLogicalOr(op, subg);
+      loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR>(
+          op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_FILL:
       loadFill(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_ZEROS_LIKE:
-      loadZerosLike(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ZEROS_LIKE);
       return;
     case BuiltinOperator::BuiltinOperator_TILE:
       loadTile(op, subg);
@@ -2049,11 +1931,20 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
       loadLogSoftmax(op, subg);
       return;
     case BuiltinOperator::BuiltinOperator_QUANTIZE:
-      loadQuantize(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::QUANTIZE);
       return;
     case BuiltinOperator::BuiltinOperator_SPACE_TO_DEPTH:
       loadSpaceToDepth(op, subg);
       return;
+    case BuiltinOperator::BuiltinOperator_L2_NORMALIZATION:
+      loadL2Normalization(op, subg);
+      break;
+    case BuiltinOperator::BuiltinOperator_LEAKY_RELU:
+      loadLeakyRelu(op, subg);
+      return;
+    case BuiltinOperator::BuiltinOperator_RANK:
+      loadRank(op, subg);
+      return;
     default:
       throw std::runtime_error(
           std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc
index 96dd4698a..92a9ee7a5 100644
--- a/runtime/onert/frontend/circle/src/circle_loader.cc
+++ b/runtime/onert/frontend/circle/src/circle_loader.cc
@@ -103,12 +103,14 @@ public:
     // Set inputs
     for (const std::int32_t input_ind : *circle_subg->inputs())
     {
-      subg->addInput(tensorIdxToOperandIdx(input_ind));
+      subg->addInput(tensorIdxToOperandIdx(input_ind),
+                     _tensor_names.at(_tensor_to_operand[input_ind]));
     }
     // Set outputs
     for (const std::int32_t output_ind : *circle_subg->outputs())
     {
-      subg->addOutput(tensorIdxToOperandIdx(output_ind));
+      subg->addOutput(tensorIdxToOperandIdx(output_ind),
+                      _tensor_names.at(_tensor_to_operand[output_ind]));
     }
     // Create operations
     for (const auto *op : *circle_subg->operators())
diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
index 8ff6cbbfd..8e3d83db4 100644
--- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
@@ -83,6 +83,189 @@ uint32_t getUint32Scalar(Operands &operands, const OperandIndex index)
 }
 
 OperationFactory::Generator
+getElementwiseActivationGenerator(const onert::ir::operation::ElementwiseActivation::Type op_type,
+                                  float alpha = 0.f, float beta = 0.f)
+{
+  return [op_type, alpha, beta](const OperationFactory::Param &init_param, Operands &) {
+    assert(init_param.input_count == 1);
+    assert(init_param.output_count == 1);
+
+    // Each input should be interpreted as follows:
+    //
+    //  0 -> Input Tensor Index
+
+    OperandIndexSequence inputs{init_param.inputs[0]};
+    OperandIndexSequence outputs{init_param.outputs[0]};
+
+    operation::ElementwiseActivation::Param param;
+    param.op_type = op_type;
+    param.alpha = alpha;
+    param.beta = beta;
+
+    return new operation::ElementwiseActivation{inputs, outputs, param};
+  };
+}
+
+OperationFactory::Generator getElementwiseBinaryGenerator(
+    const onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
+{
+  return [op_type](const OperationFactory::Param &init_param, Operands &) {
+    assert(init_param.input_count == 2);
+    assert(init_param.output_count == 1);
+
+    // Each input should be interpreted as follows:
+    //
+    //  0 -> Lefthand side operand
+    //  1 -> Righthand side operand
+
+    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+    OperandIndexSequence outputs{init_param.outputs[0]};
+
+    operation::ElementwiseBinary::Param param;
+    param.op_type = op_type;
+
+    return new operation::ElementwiseBinary{inputs, outputs, param};
+  };
+}
+
+OperationFactory::Generator
+getElementwiseUnaryGenerator(const onert::ir::operation::ElementwiseUnary::Type op_type)
+{
+  return [op_type](const OperationFactory::Param &init_param, Operands &operands) {
+    assert(init_param.input_count == 1);
+    assert(init_param.output_count == 1);
+
+    // Each input should be interpreted as follows:
+    //
+    //  0 ->  Input Tensor Index
+
+    OperandIndexSequence inputs{init_param.inputs[0]};
+    OperandIndexSequence outputs{init_param.outputs[0]};
+
+    operation::ElementwiseUnary::Param param;
+    param.op_type = op_type;
+
+    if (op_type == operation::ElementwiseUnary::Type::CAST)
+    {
+      // NNAPI uses QUANT_UINT8_ASYMM to represent UINT8 type for ANEURALNETWORKS_CAST's
+      // input/output
+      if (operands.at(inputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
+      {
+        replaceDataType(operands, inputs.at(0), DataType::UINT8);
+      }
+      if (operands.at(outputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
+      {
+        replaceDataType(operands, outputs.at(0), DataType::UINT8);
+      }
+    }
+
+    return new operation::ElementwiseUnary{inputs, outputs, param};
+  };
+}
+
+OperationFactory::Generator
+getBinaryArithmeticGenerator(const onert::ir::operation::BinaryArithmetic::ArithmeticType op_type)
+{
+  return [op_type](const OperationFactory::Param &init_param, Operands &operands) {
+    assert(init_param.input_count == 3);
+    assert(init_param.output_count == 1);
+
+    // Each input should be interpreted as follows:
+    //
+    //  0 -> Lefthand side operand
+    //  1 -> Righthand side operand
+
+    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+    OperandIndexSequence outputs{init_param.outputs[0]};
+
+    operation::BinaryArithmetic::Param param;
+    param.arithmetic_type = op_type;
+    const auto activation_index = OperandIndex{init_param.inputs[2]};
+    param.activation =
+        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+
+    return new operation::BinaryArithmetic{inputs, outputs, param};
+  };
+}
+
+OperationFactory::Generator
+getPool2DGenerator(const onert::ir::operation::Pool2D::PoolType pool_type)
+{
+  return [pool_type](const OperationFactory::Param &init_param, Operands &operands) {
+    assert(init_param.input_count == 7 || init_param.input_count == 10);
+    assert(init_param.output_count == 1);
+
+    // In common
+    //  0 -> IFM Tensor Index
+    OperandIndexSequence inputs{init_param.inputs[0]};
+    OperandIndexSequence outputs{init_param.outputs[0]};
+
+    operation::Pool2D::Param param;
+    param.op_type = pool_type;
+    if (init_param.input_count == 7) // support implicit padding
+    {
+      // Each input should be interpreted as follows:
+      //
+      //  1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
+      //  2 -> Horizontal (over width) Stride Index
+      //  3 -> Vertial (over height) Stride Index
+      //  4 -> Filter Width Index
+      //  5 -> Filter Height Index
+      //  6 -> FuseCode (activation) Index
+
+      const auto padding_index = OperandIndex{init_param.inputs[1]};
+      const auto hstride_index = OperandIndex{init_param.inputs[2]};
+      const auto vstride_index = OperandIndex{init_param.inputs[3]};
+      const auto kw_index = OperandIndex{init_param.inputs[4]};
+      const auto kh_index = OperandIndex{init_param.inputs[5]};
+      const auto activation_index = OperandIndex{init_param.inputs[6]};
+
+      param.padding.type =
+          NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+      param.stride = makeStride(operands, hstride_index, vstride_index);
+      param.kw = getUint32Scalar(operands, kw_index);
+      param.kh = operands.at(kh_index).asScalar<uint32_t>();
+      param.activation =
+          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+    }
+    else // support explicit padding
+    {
+      // Each input should be interpreted as follows:
+      //
+      //  1 -> Padding_left index
+      //  2 -> Padding_right index
+      //  3 -> Padding_top index
+      //  4 -> Padding_bottom index
+      //  5 -> Horizontal (over width) Stride Index
+      //  6 -> Vertial (over height) Stride Index
+      //  7 -> Filter Width Index
+      //  8 -> Filter Height Index
+      //  9 -> FuseCode (activation) Index
+
+      const auto padding_left_index = OperandIndex{init_param.inputs[1]};
+      const auto padding_right_index = OperandIndex{init_param.inputs[2]};
+      const auto padding_top_index = OperandIndex{init_param.inputs[3]};
+      const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
+      const auto hstride_index = OperandIndex{init_param.inputs[5]};
+      const auto vstride_index = OperandIndex{init_param.inputs[6]};
+      const auto kw_index = OperandIndex{init_param.inputs[7]};
+      const auto kh_index = OperandIndex{init_param.inputs[8]};
+      const auto activation_index = OperandIndex{init_param.inputs[9]};
+
+      param.padding.type = PaddingType::EXPLICIT;
+      param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
+                                                padding_top_index, padding_bottom_index);
+      param.stride = makeStride(operands, hstride_index, vstride_index);
+      param.kw = getUint32Scalar(operands, kw_index);
+      param.kh = getUint32Scalar(operands, kh_index);
+      param.activation =
+          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+    }
+    return new operation::Pool2D{inputs, outputs, param};
+  };
+}
+
+OperationFactory::Generator
 getReduceGenerator(const onert::ir::operation::Reduce::ReduceType reduce_type)
 {
   return [reduce_type](const OperationFactory::Param &init_param, Operands &operands) {
@@ -133,79 +316,24 @@ Operation *createSimpleBinaryOp(const OperationFactory::Param &init_param, Opera
   return new T{inputs, outputs};
 }
 
-// A generator function for binary ops with no params
-template <typename T>
-Operation *createPool2DOp(const OperationFactory::Param &init_param, Operands &operands)
+OperationFactory::Generator getComparisonGenerator(operation::Comparison::ComparisonType type)
 {
-  assert(init_param.input_count == 7 || init_param.input_count == 10);
-  assert(init_param.output_count == 1);
+  return [type](const OperationFactory::Param &init_param, Operands &) -> Operation * {
+    assert(init_param.input_count == 2 && init_param.output_count == 1);
 
-  // In common
-  //  0 -> IFM Tensor Index
-  OperandIndexSequence inputs{init_param.inputs[0]};
-  OperandIndexSequence outputs{init_param.outputs[0]};
+    OperandIndexSequence outputs{init_param.outputs[0]};
 
-  typename T::Param param;
-  if (init_param.input_count == 7) // support implicit padding
-  {
     // Each input should be interpreted as follows:
     //
-    //  1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
-    //  2 -> Horizontal (over width) Stride Index
-    //  3 -> Vertial (over height) Stride Index
-    //  4 -> Filter Width Index
-    //  5 -> Filter Height Index
-    //  6 -> FuseCode (activation) Index
-
-    const auto padding_index = OperandIndex{init_param.inputs[1]};
-    const auto hstride_index = OperandIndex{init_param.inputs[2]};
-    const auto vstride_index = OperandIndex{init_param.inputs[3]};
-    const auto kw_index = OperandIndex{init_param.inputs[4]};
-    const auto kh_index = OperandIndex{init_param.inputs[5]};
-    const auto activation_index = OperandIndex{init_param.inputs[6]};
+    //  0 -> input0 Tensor Index
+    //  1 -> input1 Tensor Index
+    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
 
-    param.padding.type =
-        NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
-    param.stride = makeStride(operands, hstride_index, vstride_index);
-    param.kw = getUint32Scalar(operands, kw_index);
-    param.kh = operands.at(kh_index).asScalar<uint32_t>();
-    param.activation =
-        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-  }
-  else // support explicit padding
-  {
-    // Each input should be interpreted as follows:
-    //
-    //  1 -> Padding_left index
-    //  2 -> Padding_right index
-    //  3 -> Padding_top index
-    //  4 -> Padding_bottom index
-    //  5 -> Horizontal (over width) Stride Index
-    //  6 -> Vertial (over height) Stride Index
-    //  7 -> Filter Width Index
-    //  8 -> Filter Height Index
-    //  9 -> FuseCode (activation) Index
-
-    const auto padding_left_index = OperandIndex{init_param.inputs[1]};
-    const auto padding_right_index = OperandIndex{init_param.inputs[2]};
-    const auto padding_top_index = OperandIndex{init_param.inputs[3]};
-    const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
-    const auto hstride_index = OperandIndex{init_param.inputs[5]};
-    const auto vstride_index = OperandIndex{init_param.inputs[6]};
-    const auto kw_index = OperandIndex{init_param.inputs[7]};
-    const auto kh_index = OperandIndex{init_param.inputs[8]};
-    const auto activation_index = OperandIndex{init_param.inputs[9]};
-
-    param.padding.type = PaddingType::EXPLICIT;
-    param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
-                                              padding_top_index, padding_bottom_index);
-    param.stride = makeStride(operands, hstride_index, vstride_index);
-    param.kw = getUint32Scalar(operands, kw_index);
-    param.kh = getUint32Scalar(operands, kh_index);
-    param.activation =
-        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-  }
-  return new T{inputs, outputs, param};
+    operation::Comparison::Param param;
+    param.comparison_type = type;
+
+    return new operation::Comparison{inputs, outputs, param};
+  };
 }
 
 } // namespace
@@ -295,9 +423,9 @@ OperationFactory::OperationFactory()
     return new operation::DepthwiseConv2D{inputs, outputs, param};
   };
 
-  _map[ANEURALNETWORKS_MAX_POOL_2D] = createPool2DOp<operation::MaxPool2D>;
+  _map[ANEURALNETWORKS_MAX_POOL_2D] = getPool2DGenerator(operation::Pool2D::PoolType::MAX);
 
-  _map[ANEURALNETWORKS_AVERAGE_POOL_2D] = createPool2DOp<operation::AvgPool2D>;
+  _map[ANEURALNETWORKS_AVERAGE_POOL_2D] = getPool2DGenerator(operation::Pool2D::PoolType::AVG);
 
   _map[ANEURALNETWORKS_CONCATENATION] = [](const OperationFactory::Param &init_param,
                                            Operands &operands) {
@@ -383,27 +511,8 @@ OperationFactory::OperationFactory()
     return new operation::Softmax{inputs, outputs, param};
   };
 
-  _map[ANEURALNETWORKS_CAST] = [](const OperationFactory::Param &init_param, Operands &operands) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //  0 -> input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    // NNAPI uses QUANT_UINT8_ASYMM to represent UINT8 type for ANEURALNETWORKS_CAST's input/output
-    if (operands.at(inputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
-    {
-      replaceDataType(operands, inputs.at(0), DataType::UINT8);
-    }
-    if (operands.at(outputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
-    {
-      replaceDataType(operands, outputs.at(0), DataType::UINT8);
-    }
-
-    return new operation::Cast{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_CAST] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST);
 
   // ANEURALNETWORKS_CAST_EX is deprecated
   // TODO Remove ANEURALNETWORKS_CAST_EX
@@ -416,7 +525,8 @@ OperationFactory::OperationFactory()
     // inputCount is either 7 or 10 acccording to NN API specification.
     //  - Padding is implicit when inputCount is 7
     //  - Padding is explicit when inputCount is 10
-    assert(init_param.input_count == 7 || init_param.input_count == 10);
+    assert(init_param.input_count == 7 || init_param.input_count == 10 ||
+           init_param.input_count == 13);
     assert(init_param.output_count == 1);
 
     //  0 -> IFM Tensor Index
@@ -427,7 +537,6 @@ OperationFactory::OperationFactory()
     OperandIndexSequence outputs{init_param.outputs[0]};
 
     Conv2D::Param param;
-
     if (init_param.input_count == 7) // support implicit padding
     {
       // Each input should be interpreted as follows:
@@ -445,6 +554,10 @@ OperationFactory::OperationFactory()
       param.padding.type =
           NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
       param.stride = makeStride(operands, hstride_index, vstride_index);
+
+      param.dilation.width_factor = 1;
+      param.dilation.height_factor = 1;
+
       param.activation =
           NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
     }
@@ -472,34 +585,62 @@ OperationFactory::OperationFactory()
       param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
                                                 padding_top_index, padding_bottom_index);
       param.stride = makeStride(operands, hstride_index, vstride_index);
+
+      param.dilation.width_factor = 1;
+      param.dilation.height_factor = 1;
+
       param.activation =
           NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
     }
+    else if (init_param.input_count == 13) // support dilation
+    {
+      // Each input should be interpreted as follows:
+      //
+      //  3 -> Padding_left Index
+      //  4 -> Padding_right Index
+      //  5 -> Padding_top Index
+      //  6 -> Padding_bottom Index
+      //  7 -> Stride (width) Index
+      //  8 -> Stride (height) Index
+      //  9 -> Activation Index
+      //  11 -> Dilation (width_factor) Index
+      //  12 -> Dilation (height_factor) INdex
 
-    return new Conv2D{inputs, outputs, param};
-  };
-
-  _map[ANEURALNETWORKS_ADD] = [](const OperationFactory::Param &init_param, Operands &operands) {
-    assert(init_param.input_count == 3);
-    assert(init_param.output_count == 1);
+      const auto padding_left_index = OperandIndex{init_param.inputs[3]};
+      const auto padding_right_index = OperandIndex{init_param.inputs[4]};
+      const auto padding_top_index = OperandIndex{init_param.inputs[5]};
+      const auto padding_bottom_index = OperandIndex{init_param.inputs[6]};
+      const auto hstride_index = OperandIndex{init_param.inputs[7]};
+      const auto vstride_index = OperandIndex{init_param.inputs[8]};
+      const auto activation_index = OperandIndex{init_param.inputs[9]};
+      const auto width_factor_index = OperandIndex{init_param.inputs[11]};
+      const auto height_factor_index = OperandIndex{init_param.inputs[12]};
 
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> Lefthand side operand
-    //  1 -> Righthand side operand
+      param.padding.type = PaddingType::EXPLICIT;
+      param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
+                                                padding_top_index, padding_bottom_index);
+      param.stride = makeStride(operands, hstride_index, vstride_index);
 
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-    OperandIndexSequence outputs{init_param.outputs[0]};
+      auto width_factor = operands.at(width_factor_index).asScalar<int32_t>();
+      auto height_factor = operands.at(height_factor_index).asScalar<int32_t>();
 
-    operation::Add::Param param;
+      param.dilation.width_factor = width_factor;
+      param.dilation.height_factor = height_factor;
 
-    const auto activation_index = OperandIndex{init_param.inputs[2]};
-    param.activation =
-        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+      param.activation =
+          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+    }
+    else
+    {
+      throw std::runtime_error{"Conv2D: unsupported input operand count"};
+    }
 
-    return new operation::Add{inputs, outputs, param};
+    return new Conv2D{inputs, outputs, param};
   };
 
+  _map[ANEURALNETWORKS_ADD] =
+      getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::ADD);
+
   _map[ANEURALNETWORKS_ADDV2_EX] = _map[ANEURALNETWORKS_ADD];
 
   _map[ANEURALNETWORKS_REDUCE_SUM] =
@@ -509,26 +650,8 @@ OperationFactory::OperationFactory()
   // TODO Remove ANEURALNETWORKS_REDUCE_SUM_EX
   _map[ANEURALNETWORKS_REDUCE_SUM_EX] = _map[ANEURALNETWORKS_REDUCE_SUM];
 
-  _map[ANEURALNETWORKS_SUB] = [](const OperationFactory::Param &init_param, Operands &operands) {
-    assert(init_param.input_count == 3);
-    assert(init_param.output_count == 1);
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> Lefthand side operand
-    //  1 -> Righthand side operand
-
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    operation::Sub::Param param;
-
-    const auto activation_index = OperandIndex{init_param.inputs[2]};
-    param.activation =
-        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
-    return new operation::Sub{inputs, outputs, param};
-  };
+  _map[ANEURALNETWORKS_SUB] =
+      getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB);
 
   _map[ANEURALNETWORKS_SLICE] = [](const OperationFactory::Param &init_param, Operands &) {
     assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -611,27 +734,8 @@ OperationFactory::OperationFactory()
     return new operation::Transpose{inputs, outputs, param};
   };
 
-  _map[ANEURALNETWORKS_MUL] = [](const OperationFactory::Param &init_param, Operands &operands) {
-    assert(init_param.input_count == 3 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> LHS Tensor Index
-    //  1 -> RHS Tensor Index
-    //  2 -> Activation Index
-
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Mul::Param param;
-
-    const auto activation_index = OperandIndex{init_param.inputs[2]};
-    param.activation =
-        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
-    return new operation::Mul{inputs, outputs, param};
-  };
+  _map[ANEURALNETWORKS_MUL] =
+      getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::MUL);
 
   _map[ANEURALNETWORKS_SQUEEZE] = [](const OperationFactory::Param &init_param,
                                      Operands &operands) {
@@ -672,34 +776,18 @@ OperationFactory::OperationFactory()
     return new operation::Squeeze{inputs, outputs, param};
   };
 
-  _map[ANEURALNETWORKS_TANH] = CreateSimpleUnaryOp<operation::Tanh>;
+  _map[ANEURALNETWORKS_TANH] = getElementwiseActivationGenerator(
+      onert::ir::operation::ElementwiseActivation::Type::TANH, 1.f, 1.f);
 
-  _map[ANEURALNETWORKS_LOG] = CreateSimpleUnaryOp<operation::Log>;
+  _map[ANEURALNETWORKS_LOG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOG);
 
-  _map[ANEURALNETWORKS_LOGISTIC] = CreateSimpleUnaryOp<operation::Logistic>;
+  _map[ANEURALNETWORKS_LOGISTIC] = getElementwiseActivationGenerator(
+      onert::ir::operation::ElementwiseActivation::Type::LOGISTIC);
 
-  _map[ANEURALNETWORKS_DIV] = [](const OperationFactory::Param &init_param, Operands &operands) {
-    assert(init_param.input_count == 3 && init_param.output_count == 1);
+  _map[ANEURALNETWORKS_DIV] =
+      getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::DIV);
 
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> LHS Tensor Index
-    //  1 -> RHS Tensor Index
-    //  2 -> Activation Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Div::Param param;
-
-    const auto activation_index = OperandIndex{init_param.inputs[2]};
-    param.activation =
-        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
-    return new operation::Div{inputs, outputs, param};
-  };
-
-  _map[ANEURALNETWORKS_EXP] = CreateSimpleUnaryOp<operation::Exp>;
+  _map[ANEURALNETWORKS_EXP] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::EXP);
 
   // ANEURALNETWORKS_EXP_EX is deprecated
   // TODO Remove ANEURALNETWORKS_EXP_EX
@@ -710,39 +798,17 @@ OperationFactory::OperationFactory()
   //  1 -> Axis Tensor Index
   _map[ANEURALNETWORKS_EXPAND_DIMS] = createSimpleBinaryOp<operation::ExpandDims>;
 
-  _map[ANEURALNETWORKS_GREATER] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::Greater;
-
-    return new operation::Comparison{inputs, outputs, param};
-  };
-
-  _map[ANEURALNETWORKS_GREATER_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::GreaterEqual;
-
-    return new operation::Comparison{inputs, outputs, param};
-  };
+  _map[ANEURALNETWORKS_GREATER] =
+      getComparisonGenerator(operation::Comparison::ComparisonType::Greater);
+  _map[ANEURALNETWORKS_GREATER_EQUAL] =
+      getComparisonGenerator(operation::Comparison::ComparisonType::GreaterEqual);
+  _map[ANEURALNETWORKS_LESS] = getComparisonGenerator(operation::Comparison::ComparisonType::Less);
+  _map[ANEURALNETWORKS_LESS_EQUAL] =
+      getComparisonGenerator(operation::Comparison::ComparisonType::LessEqual);
+  _map[ANEURALNETWORKS_NOT_EQUAL] =
+      getComparisonGenerator(operation::Comparison::ComparisonType::NotEqual);
+  _map[ANEURALNETWORKS_EQUAL] =
+      getComparisonGenerator(operation::Comparison::ComparisonType::Equal);
 
   // ANEURALNETWORKS_GREATER_EQUAL_EX is deprecated
   // TODO Remove ANEURALNETWORKS_GREATER_EQUAL_EX
@@ -767,40 +833,6 @@ OperationFactory::OperationFactory()
     return new operation::Comparison{inputs, outputs, param};
   };
 
-  _map[ANEURALNETWORKS_LESS] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::Less;
-
-    return new operation::Comparison{inputs, outputs, param};
-  };
-
-  _map[ANEURALNETWORKS_LESS_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::LessEqual;
-
-    return new operation::Comparison{inputs, outputs, param};
-  };
-
   // ANEURALNETWORKS_LESS_EX is deprecated
   // TODO Remove ANEURALNETWORKS_LESS_EX
   _map[ANEURALNETWORKS_LESS_EX] = [](const OperationFactory::Param &init_param,
@@ -837,23 +869,6 @@ OperationFactory::OperationFactory()
   // TODO Remove ANEURALNETWORKS_REDUCE_MAX_EX
   _map[ANEURALNETWORKS_REDUCE_MAX_EX] = _map[ANEURALNETWORKS_REDUCE_MAX];
 
-  _map[ANEURALNETWORKS_NOT_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input1 Tensor Index
-    //  1 -> input2 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::NotEqual;
-
-    return new operation::Comparison{inputs, outputs, param};
-  };
-
   // ANEURALNETWORKS_NOT_EQUAL_EX is deprecated
   // TODO Remove ANEURALNETWORKS_NOT_EQUAL_EX
   _map[ANEURALNETWORKS_NOT_EQUAL_EX] = [](const OperationFactory::Param &init_param,
@@ -877,7 +892,8 @@ OperationFactory::OperationFactory()
     return new operation::Comparison{inputs, outputs, param};
   };
 
-  _map[ANEURALNETWORKS_LOGICAL_AND] = createSimpleBinaryOp<operation::LogicalAnd>;
+  _map[ANEURALNETWORKS_LOGICAL_AND] = getElementwiseBinaryGenerator(
+      operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND);
 
   // ANEURALNETWORKS_LOGICAL_AND_EX is deprecated
   // TODO Remove ANEURALNETWORKS_LOGICAL_AND_EX
@@ -898,10 +914,14 @@ OperationFactory::OperationFactory()
     replaceDataType(operands, inputs.at(1), DataType::BOOL8);
     replaceDataType(operands, outputs.at(0), DataType::BOOL8);
 
-    return new operation::LogicalAnd{inputs, outputs};
+    operation::ElementwiseBinary::Param param;
+    param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND;
+
+    return new operation::ElementwiseBinary{inputs, outputs, param};
   };
 
-  _map[ANEURALNETWORKS_RSQRT] = CreateSimpleUnaryOp<operation::RSQRT>;
+  _map[ANEURALNETWORKS_RSQRT] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT);
 
   _map[ANEURALNETWORKS_SELECT] = [](const OperationFactory::Param &init_param, Operands &) {
     assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -937,7 +957,9 @@ OperationFactory::OperationFactory()
   // TODO Remove ANEURALNETWORKS_RSQRT_EX
   _map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT];
 
-  _map[ANEURALNETWORKS_RELU] = CreateSimpleUnaryOp<operation::ReLU>;
+  _map[ANEURALNETWORKS_RELU] =
+      getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU,
+                                        onert::ir::operation::ElementwiseActivation::infinity, 0);
 
   _map[ANEURALNETWORKS_RESIZE_BILINEAR] = [](const OperationFactory::Param &init_param,
                                              Operands &operands) {
@@ -960,9 +982,11 @@ OperationFactory::OperationFactory()
     return new operation::ResizeBilinear{inputs, outputs, param};
   };
 
-  _map[ANEURALNETWORKS_RELU1] = CreateSimpleUnaryOp<operation::ReLU1>;
+  _map[ANEURALNETWORKS_RELU1] = getElementwiseActivationGenerator(
+      onert::ir::operation::ElementwiseActivation::Type::RELU, 1.f, -1.f);
 
-  _map[ANEURALNETWORKS_RELU6] = CreateSimpleUnaryOp<operation::ReLU6>;
+  _map[ANEURALNETWORKS_RELU6] = getElementwiseActivationGenerator(
+      onert::ir::operation::ElementwiseActivation::Type::RELU, 6.f, 0.f);
 
   _map[ANEURALNETWORKS_REVERSE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
     assert(init_param.input_count == 2 && init_param.output_count == 1);
@@ -1009,17 +1033,8 @@ OperationFactory::OperationFactory()
     return new operation::RNN{inputs, outputs, param};
   };
 
-  _map[ANEURALNETWORKS_FLOOR] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //  0 -> input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    return new operation::Floor{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_FLOOR] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::FLOOR);
 
   _map[ANEURALNETWORKS_SPACE_TO_BATCH_ND] = [](const OperationFactory::Param &init_param,
                                                Operands &) {
@@ -1059,7 +1074,7 @@ OperationFactory::OperationFactory()
     return new operation::SpaceToDepth{inputs, outputs, param};
   };
 
-  _map[ANEURALNETWORKS_L2_POOL_2D] = createPool2DOp<operation::L2Pool2D>;
+  _map[ANEURALNETWORKS_L2_POOL_2D] = getPool2DGenerator(operation::Pool2D::PoolType::L2);
 
   _map[ANEURALNETWORKS_EMBEDDING_LOOKUP] = [](const OperationFactory::Param &init_param,
                                               Operands &) {
@@ -1157,35 +1172,15 @@ OperationFactory::OperationFactory()
     return new operation::TransposeConv{inputs, outputs, param};
   };
 
-  _map[ANEURALNETWORKS_SQRT] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //  0 -> input Tensor Index
-
-    OperandIndexSequence inputs{init_param.inputs[0]};
-    return new operation::SQRT{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_SQRT] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT);
 
   // ANEURALNETWORKS_SQRT_EX is deprecated
   // TODO Remove ANEURALNETWORKS_SQRT_EX
   _map[ANEURALNETWORKS_SQRT_EX] = _map[ANEURALNETWORKS_SQRT];
 
-  _map[ANEURALNETWORKS_LOGICAL_OR] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    return new operation::LogicalOr{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_LOGICAL_OR] = getElementwiseBinaryGenerator(
+      operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
 
   // ANEURALNETWORKS_LOGICAL_OR_EX is deprecated
   // TODO Remove ANEURALNETWORKS_LOGICAL_OR_EX
@@ -1206,10 +1201,14 @@ OperationFactory::OperationFactory()
     replaceDataType(operands, inputs.at(1), DataType::BOOL8);
     replaceDataType(operands, outputs.at(0), DataType::BOOL8);
 
-    return new operation::LogicalOr{inputs, outputs};
+    operation::ElementwiseBinary::Param param;
+    param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR;
+
+    return new operation::ElementwiseBinary{inputs, outputs, param};
   };
 
-  _map[ANEURALNETWORKS_LOGICAL_NOT] = CreateSimpleUnaryOp<operation::LogicalNot>;
+  _map[ANEURALNETWORKS_LOGICAL_NOT] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT);
 
   // ANEURALNETWORKS_LOGICAL_NOT_EX is deprecated
   // TODO Remove ANEURALNETWORKS_LOGICAL_NOT_EX
@@ -1228,7 +1227,10 @@ OperationFactory::OperationFactory()
     replaceDataType(operands, inputs.at(0), DataType::BOOL8);
     replaceDataType(operands, outputs.at(0), DataType::BOOL8);
 
-    return new operation::LogicalNot{inputs, outputs};
+    operation::ElementwiseUnary::Param param;
+    param.op_type = operation::ElementwiseUnary::Type::LOGICAL_NOT;
+
+    return new operation::ElementwiseUnary{inputs, outputs, param};
   };
 
   _map[ANEURALNETWORKS_LSTM] = [](const OperationFactory::Param &init_param, Operands &operands) {
@@ -1306,23 +1308,6 @@ OperationFactory::OperationFactory()
     return new operation::LSTM{inputs, outputs, param};
   };
 
-  _map[ANEURALNETWORKS_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::Equal;
-
-    return new operation::Comparison{inputs, outputs, param};
-  };
-
   // ANEURALNETWORKS_EQUAL_EX is deprecated
   // TODO Remove ANEURALNETWORKS_EQUAL_EX
   _map[ANEURALNETWORKS_EQUAL_EX] = [](const OperationFactory::Param &init_param,
@@ -1409,13 +1394,13 @@ OperationFactory::OperationFactory()
   // TODO Remove ANEURALNETWORKS_GATHER_EX
   _map[ANEURALNETWORKS_GATHER_EX] = _map[ANEURALNETWORKS_GATHER];
 
-  _map[ANEURALNETWORKS_NEG] = CreateSimpleUnaryOp<operation::Neg>;
+  _map[ANEURALNETWORKS_NEG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::NEG);
 
   // ANEURALNETWORKS_NEG_EX is deprecated
   // TODO Remove ANEURALNETWORKS_NEG_EX
   _map[ANEURALNETWORKS_NEG_EX] = _map[ANEURALNETWORKS_NEG];
 
-  _map[ANEURALNETWORKS_ABS] = CreateSimpleUnaryOp<operation::Abs>;
+  _map[ANEURALNETWORKS_ABS] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ABS);
 
   // ANEURALNETWORKS_ABS_EX is deprecated
   // TODO Remove ANEURALNETWORKS_ABS_EX
@@ -1434,6 +1419,8 @@ OperationFactory::OperationFactory()
 
     operation::ArgMax::Param param;
     param.axis = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
+    // NNAPI ARGMAX output type is always int32
+    param.output_type = DataType::INT32;
 
     return new operation::ArgMax{inputs, outputs, param};
   };
@@ -1442,7 +1429,8 @@ OperationFactory::OperationFactory()
   // TODO Remove ANEURALNETWORKS_ARGMAX_EX
   _map[ANEURALNETWORKS_ARGMAX_EX] = _map[ANEURALNETWORKS_ARGMAX];
 
-  _map[ANEURALNETWORKS_DEQUANTIZE] = CreateSimpleUnaryOp<operation::Dequantize>;
+  _map[ANEURALNETWORKS_DEQUANTIZE] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::DEQUANTIZE);
 
   _map[ANEURALNETWORKS_MEAN] = [](const OperationFactory::Param &init_param, Operands &operands) {
     assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -1600,9 +1588,11 @@ OperationFactory::OperationFactory()
 
   _map[ANEURALNETWORKS_PAD_V2] = _map[ANEURALNETWORKS_PAD];
 
-  _map[ANEURALNETWORKS_MINIMUM] = createSimpleBinaryOp<operation::Min>;
+  _map[ANEURALNETWORKS_MINIMUM] =
+      getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MIN);
 
-  _map[ANEURALNETWORKS_MAXIMUM] = createSimpleBinaryOp<operation::Max>;
+  _map[ANEURALNETWORKS_MAXIMUM] =
+      getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MAX);
 
   _map[ANEURALNETWORKS_ONE_HOT_EX] = [](const OperationFactory::Param &init_param,
                                         Operands &operands) {
@@ -1628,23 +1618,10 @@ OperationFactory::OperationFactory()
     return new operation::OneHot{inputs, outputs, param};
   };
 
-  _map[ANEURALNETWORKS_COS_EX] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence inputs{init_param.inputs[0]};
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    return new operation::Cos{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_COS_EX] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::COS);
 
-  _map[ANEURALNETWORKS_SIN] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence inputs{init_param.inputs[0]};
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    return new operation::Sin{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_SIN] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SIN);
 
   _map[ANEURALNETWORKS_SHAPE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
     assert(init_param.input_count == 1 && init_param.output_count == 1);
@@ -1658,17 +1635,8 @@ OperationFactory::OperationFactory()
   _map[ANEURALNETWORKS_REDUCE_PROD] =
       getReduceGenerator(onert::ir::operation::Reduce::ReduceType::PROD);
 
-  _map[ANEURALNETWORKS_ROUND_EX] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //  0 -> input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    return new operation::Round{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_ROUND_EX] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ROUND);
 
   _map[ANEURALNETWORKS_RANGE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
     assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -1695,18 +1663,8 @@ OperationFactory::OperationFactory()
   //  1 -> A 1-D tensor, specifying the value
   _map[ANEURALNETWORKS_FILL_EX] = createSimpleBinaryOp<operation::Fill>;
 
-  _map[ANEURALNETWORKS_ZEROS_LIKE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //  0 -> input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    return new operation::ZerosLike{inputs, outputs};
-  };
-
+  _map[ANEURALNETWORKS_ZEROS_LIKE_EX] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ZEROS_LIKE);
   // Each input should be interpreted as follows:
   //  0 -> Input Tensor Index
   //  1 -> Multiple Tensor Index
@@ -1845,14 +1803,8 @@ OperationFactory::OperationFactory()
     return new operation::LogSoftmax{inputs, outputs, param};
   };
 
-  _map[ANEURALNETWORKS_QUANTIZE] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence inputs{init_param.inputs[0]};
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    return new operation::Quantize{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_QUANTIZE] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::QUANTIZE);
 }
 
 Operation *OperationFactory::create(ANeuralNetworksOperationType type,
diff --git a/runtime/onert/frontend/tflite/src/tflite_loader.cc b/runtime/onert/frontend/tflite/src/tflite_loader.cc
index 86c2c6bc7..7eef15717 100644
--- a/runtime/onert/frontend/tflite/src/tflite_loader.cc
+++ b/runtime/onert/frontend/tflite/src/tflite_loader.cc
@@ -90,12 +90,14 @@ public:
     // Set inputs
     for (const std::int32_t input_ind : *tflite_subg->inputs())
     {
-      subg->addInput(tensorIdxToOperandIdx(input_ind));
+      subg->addInput(tensorIdxToOperandIdx(input_ind),
+                     _tensor_names.at(_tensor_to_operand[input_ind]));
     }
     // Set outputs
     for (const std::int32_t output_ind : *tflite_subg->outputs())
     {
-      subg->addOutput(tensorIdxToOperandIdx(output_ind));
+      subg->addOutput(tensorIdxToOperandIdx(output_ind),
+                      _tensor_names.at(_tensor_to_operand[output_ind]));
     }
     // Create operations
     for (const auto *op : *tflite_subg->operators())
diff --git a/runtime/onert/test/core/compiler/Scheduler.cc b/runtime/onert/test/core/compiler/Scheduler.cc
index 94f51ddd6..50f3964db 100644
--- a/runtime/onert/test/core/compiler/Scheduler.cc
+++ b/runtime/onert/test/core/compiler/Scheduler.cc
@@ -22,9 +22,7 @@
 #include <ir/TypeInfo.h>
 #include <ir/DataType.h>
 
-#include <ir/operation/Add.h>
-#include <ir/operation/Sub.h>
-#include <ir/operation/Mul.h>
+#include <ir/operation/BinaryArithmetic.h>
 #include <ir/operation/FullyConnected.h>
 
 #include <gtest/gtest.h>
@@ -209,8 +207,7 @@ using OIS = OperandIndexSequence;
 template <typename NodeT, typename... Types>
 OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args)
 {
-  typename NodeT::Param op_params{Activation::NONE};
-  auto op = std::make_unique<NodeT>(std::forward<Types>(args)..., op_params);
+  auto op = std::make_unique<NodeT>(std::forward<Types>(args)...);
   auto op_idx = graph->addOperation(std::move(op));
   // For now in scheduler test all operations in tested graphs has same size (for simplicity)
   assert(calcOpSize(graph, op_idx) == OPERATION_SIZE);
@@ -227,17 +224,20 @@ std::shared_ptr<Graph> createStraightGraph()
   auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
   auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
   auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx});
+  BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
 
   // Create sub node
   auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
   auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  create<Sub>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx});
+  BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params);
 
   // Create mul node
   auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
   auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  create<Mul>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx});
+  BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params);
 
   graph->finishBuilding();
   return graph;
@@ -261,31 +261,39 @@ std::shared_ptr<Graph> createBranchedGraph()
   auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
   auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
   auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx});
+  BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
 
   // Create mul1 node
   auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
   auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  create<Mul>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx});
+  BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx},
+                           mul1_op_params);
 
   // Create mul2 node
   auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
   auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  create<Mul>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx});
+  BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx},
+                           mul2_op_params);
 
   // Create fc1 node
   auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
   auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx});
+  FullyConnected::Param fc1_op_params{Activation::NONE};
+  create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params);
 
   // Create fc2 node
   auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
   auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx});
+  FullyConnected::Param fc2_op_params{Activation::NONE};
+  create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params);
 
-  // Create add2 node
+  // Create sub node
   auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  create<Sub>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx});
+  BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params);
 
   graph->finishBuilding();
   return graph;
diff --git a/runtime/onert/test/core/exec/ExecInstance.cc b/runtime/onert/test/core/exec/ExecInstance.cc
index 0fcf372c3..806b47ecc 100644
--- a/runtime/onert/test/core/exec/ExecInstance.cc
+++ b/runtime/onert/test/core/exec/ExecInstance.cc
@@ -20,7 +20,7 @@
 #include "ir/Graph.h"
 #include "compiler/Compiler.h"
 #include "exec/Execution.h"
-#include "ir/operation/Add.h"
+#include "ir/operation/BinaryArithmetic.h"
 
 namespace
 {
@@ -54,16 +54,20 @@ public:
         .at(operand_rhs2)
         .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
     // 2nd add operations (result2 <= result1 + rhs2)
-    operation::Add::Param param1;
+    operation::BinaryArithmetic::Param param1;
+    param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
     param1.activation = Activation::NONE;
     auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
     auto output_set1 = OperandIndexSequence{operand_result1};
-    graph->addOperation(std::make_unique<operation::Add>(input_set1, output_set1, param1));
-    operation::Add::Param param2;
+    graph->addOperation(
+        std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+    operation::BinaryArithmetic::Param param2;
+    param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
     param2.activation = Activation::NONE;
     auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
     auto output_set2 = OperandIndexSequence{operand_result2};
-    graph->addOperation(std::make_unique<operation::Add>(input_set2, output_set2, param2));
+    graph->addOperation(
+        std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
     // Identify model inputs and outputs
     graph->addInput(operand_lhs);
     graph->addInput(operand_rhs1);
diff --git a/runtime/onert/test/core/interp/ExecManager.cc b/runtime/onert/test/core/interp/ExecManager.cc
index 2e295ef40..09190bc58 100644
--- a/runtime/onert/test/core/interp/ExecManager.cc
+++ b/runtime/onert/test/core/interp/ExecManager.cc
@@ -21,7 +21,7 @@
 #include "ir/Graph.h"
 #include "interp/InterpExecutor.h"
 #include "exec/Execution.h"
-#include "ir/operation/Add.h"
+#include "ir/operation/BinaryArithmetic.h"
 
 namespace
 {
@@ -57,11 +57,13 @@ protected:
 
     // Add operations
 
-    operation::Add::Param param;
+    operation::BinaryArithmetic::Param param;
+    param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
     param.activation = Activation::NONE;
     auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
     auto output_set = OperandIndexSequence{operand_result};
-    _graph->addOperation(std::make_unique<operation::Add>(input_set, output_set, param));
+    _graph->addOperation(
+        std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
 
     // Identify model inputs and outputs
 
@@ -112,17 +114,21 @@ protected:
 
     // 2nd add operations (result2 <= result1 + rhs2)
 
-    operation::Add::Param param1;
+    operation::BinaryArithmetic::Param param1;
+    param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
     param1.activation = Activation::NONE;
     auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
     auto output_set1 = OperandIndexSequence{operand_result1};
-    _graph->addOperation(std::make_unique<operation::Add>(input_set1, output_set1, param1));
+    _graph->addOperation(
+        std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
 
-    operation::Add::Param param2;
+    operation::BinaryArithmetic::Param param2;
+    param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
     param2.activation = Activation::NONE;
     auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
     auto output_set2 = OperandIndexSequence{operand_result2};
-    _graph->addOperation(std::make_unique<operation::Add>(input_set2, output_set2, param2));
+    _graph->addOperation(
+        std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
 
     // Identify model inputs and outputs
 
@@ -170,11 +176,13 @@ protected:
 
     // Add operations
 
-    operation::Add::Param param;
+    operation::BinaryArithmetic::Param param;
+    param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
     param.activation = Activation::NONE;
     auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
     auto output_set = OperandIndexSequence{operand_result};
-    _graph->addOperation(std::make_unique<operation::Add>(input_set, output_set, param));
+    _graph->addOperation(
+        std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
 
     // Identify model inputs and outputs
 
diff --git a/runtime/onert/test/util/ShapeInference.cc b/runtime/onert/test/util/ShapeInference.cc
index 63a948d7b..aab33fab5 100644
--- a/runtime/onert/test/util/ShapeInference.cc
+++ b/runtime/onert/test/util/ShapeInference.cc
@@ -47,8 +47,9 @@ TEST(ShapeInference, Pool2DNodeSame)
   Stride stride{3, 7};
   Padding padding{PaddingType::SAME};
 
-  operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
-  auto infered_out_shape = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
+  operation::Pool2D::Param avg_pool_param{
+      operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
 
   ASSERT_EQ(infered_out_shape.rank(), 4);
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -56,8 +57,9 @@ TEST(ShapeInference, Pool2DNodeSame)
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
 
-  operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
-  infered_out_shape = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
+  operation::Pool2D::Param max_pool_param{
+      operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
 
   ASSERT_EQ(infered_out_shape.rank(), 4);
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -72,8 +74,9 @@ TEST(ShapeInference, Pool2DNodeValid)
   Stride stride{3, 7};
   Padding padding{PaddingType::VALID};
 
-  operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
-  auto infered_out_shape = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
+  operation::Pool2D::Param avg_pool_param{
+      operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
 
   ASSERT_EQ(infered_out_shape.rank(), 4);
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -81,8 +84,9 @@ TEST(ShapeInference, Pool2DNodeValid)
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
 
-  operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
-  infered_out_shape = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
+  operation::Pool2D::Param max_pool_param{
+      operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
 
   ASSERT_EQ(infered_out_shape.rank(), 4);
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -98,8 +102,9 @@ TEST(ShapeInference, Pool2DNodeExplicit)
   Stride stride{3, 7};
   Padding padding{4, 3, 2, 1};
 
-  operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
-  auto infered_out_shape = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
+  operation::Pool2D::Param avg_pool_param{
+      operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
 
   ASSERT_EQ(infered_out_shape.rank(), 4);
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -107,8 +112,9 @@ TEST(ShapeInference, Pool2DNodeExplicit)
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
 
-  operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
-  infered_out_shape = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
+  operation::Pool2D::Param max_pool_param{
+      operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
 
   ASSERT_EQ(infered_out_shape.rank(), 4);
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -122,7 +128,8 @@ TEST(ShapeInference, Conv2D)
   Shape in_shape{10, 6, 12, 20};
   Shape ker_shape{30, 3, 6, 20};
 
-  operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE};
+  operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE,
+                                 Dilation{1, 1}};
   auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
 
   ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -131,7 +138,8 @@ TEST(ShapeInference, Conv2D)
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
 
-  param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE};
+  param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE,
+                                   Dilation{1, 1}};
   infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
 
   ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -140,7 +148,8 @@ TEST(ShapeInference, Conv2D)
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
 
-  param = operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE};
+  param =
+      operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
   infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
 
   ASSERT_EQ(infered_out_shape.rank(), 4);
author	Chunseok Lee <chunseok.lee@samsung.com>	2020-09-05 21:49:46 +0900
committer	Chunseok Lee <chunseok.lee@samsung.com>	2020-09-05 21:49:46 +0900
commit	74476a2d0296bdad70a2f7f90bc7419a8b05bffd (patch)
tree	3f991636c1e9423d38eb16a384c20b569b0d678e /runtime/onert
parent	042b262b3633b6c0f577aed6cb4b980ad0c1dcf3 (diff)
download	nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.gz nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.bz2 nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.zip