Imported Upstream version 1.12.0upstream/1.12.0

author: Chunseok Lee <chunseok.lee@samsung.com> 2020-12-14 14:43:43 +0900
committer: Chunseok Lee <chunseok.lee@samsung.com> 2020-12-14 14:43:43 +0900
commit: 62529acabbafce7730601ed01d5709d7bc0d378a (patch)
tree: bf6912cfa8fac4a2997292bfcb3c82055734c97e /runtime/onert
parent: 6ea13af5257155ff993c205cf997b870cc627f73 (diff)
download: nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.gz
nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.bz2
nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.zip
228 files changed, 8292 insertions, 3566 deletions
diff --git a/runtime/onert/api/.clang-format b/runtime/onert/api/.clang-format
new file mode 120000
index 000000000..83185fee3
--- /dev/null
+++ b/runtime/onert/api/.clang-format
@@ -0,0 +1 @@
+../../../.clang-format.8
+\ No newline at end of file
diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h
index 76380b4b8..6eb7e6ba9 100644
--- a/runtime/onert/api/include/nnfw.h
+++ b/runtime/onert/api/include/nnfw.h
@@ -64,7 +64,8 @@ typedef struct nnfw_session nnfw_session;
  *
  * The type of tensor represented in {@link nnfw_tensorinfo}
  */
-typedef enum {
+typedef enum
+{
   /** A tensor of 32 bit floating point */
   NNFW_TYPE_TENSOR_FLOAT32 = 0,
   /** A tensor of 32 bit signed integer */
@@ -96,7 +97,8 @@ typedef enum {
 /**
  * @brief Result values returned from a call to an API function
  */
-typedef enum {
+typedef enum
+{
   /** Successful */
   NNFW_STATUS_NO_ERROR = 0,
   /**
@@ -117,7 +119,8 @@ typedef enum {
 /**
  * @brief Data format of a tensor
  */
-typedef enum {
+typedef enum
+{
   /** Don't care layout */
   NNFW_LAYOUT_NONE = 0,
   /**
@@ -135,7 +138,8 @@ typedef enum {
 /**
  * @brief Information ID for retrieving information on nnfw (e.g. version)
  */
-typedef enum {
+typedef enum
+{
   /** nnfw runtime version
    * Its value is uint32 in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch.
    */
diff --git a/runtime/onert/api/include/nnfw_internal.h b/runtime/onert/api/include/nnfw_internal.h
index eb4b6d629..a88e32436 100644
--- a/runtime/onert/api/include/nnfw_internal.h
+++ b/runtime/onert/api/include/nnfw_internal.h
@@ -35,4 +35,13 @@ NNFW_STATUS nnfw_get_config(nnfw_session *session, const char *key, char *value,
  */
 NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer, size_t size);
 
+/**
+ * @brief Load a tflite/circle model from file.
+ *
+ * @param[in] session   session
+ * @param[in] file_path Path to model file. Model type(tflite/circle) is decided by file extension
+ * @return    NFNFW_STATUS
+ */
+NNFW_STATUS nnfw_load_model_from_modelfile(nnfw_session *session, const char *file_path);
+
 #endif // __NNFW_INTERNAL_H__
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h
index 31c3890e3..28703c0eb 100644
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
  * NNFW_VERSION is a uint32 value representing nnfw runtime version
  * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
  */
-#define NNFW_VERSION 0x01000b01
+#define NNFW_VERSION 0x01000c00
 
 #endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/api/src/CustomKernel.cc b/runtime/onert/api/src/CustomKernel.cc
index 3f3a5d81e..56525feff 100644
--- a/runtime/onert/api/src/CustomKernel.cc
+++ b/runtime/onert/api/src/CustomKernel.cc
@@ -65,7 +65,7 @@ public:
 };
 
 Kernel::Kernel(const nnfw_custom_eval evalFunction)
-    : _in_params(), _userdata(nullptr), _userdata_size(0), _evalFunction(evalFunction)
+  : _in_params(), _userdata(nullptr), _userdata_size(0), _evalFunction(evalFunction)
 {
 }
 
diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc
index 835b2078a..4eba4ecec 100644
--- a/runtime/onert/api/src/nnfw_api.cc
+++ b/runtime/onert/api/src/nnfw_api.cc
@@ -90,7 +90,7 @@ NNFW_STATUS nnfw_close_session(nnfw_session *session)
 NNFW_STATUS nnfw_load_model_from_file(nnfw_session *session, const char *pacakge_file_path)
 {
   NNFW_RETURN_ERROR_IF_NULL(session);
-  return session->load_model_from_file(pacakge_file_path);
+  return session->load_model_from_nnpackage(pacakge_file_path);
 }
 
 /*
@@ -350,6 +350,12 @@ NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer,
   return session->load_circle_from_buffer(buffer, size);
 }
 
+NNFW_STATUS nnfw_load_model_from_modelfile(nnfw_session *session, const char *file_path)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->load_model_from_modelfile(file_path);
+}
+
 NNFW_STATUS nnfw_input_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index)
 {
   NNFW_RETURN_ERROR_IF_NULL(session);
diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc
index a4c69eb4f..c3fdb131b 100644
--- a/runtime/onert/api/src/nnfw_api_internal.cc
+++ b/runtime/onert/api/src/nnfw_api_internal.cc
@@ -19,17 +19,19 @@
 #include "compiler/Compiler.h"
 #include "util/ConfigSource.h"
 #include "util/Exceptions.h"
+#include "util/logging.h"
 #include "exec/Execution.h"
 #include "circle_loader.h"
 #include "tflite_loader.h"
 #include "json/json.h"
 #include "ir/OpCode.h"
+#include "util/TracingCtx.h"
+
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <vector>
 #include <dirent.h>
-#include <util/ConfigSource.h>
 #include <misc/string_helpers.h>
 
 /*
@@ -40,8 +42,11 @@
 #define MAX_PATH_LENGTH 1024
 #define MAX_TENSOR_NAME_LENGTH 64
 
+namespace
+{
+
 // Is null-terminating in length ?
-static bool null_terminating(const char *str, uint32_t length)
+bool null_terminating(const char *str, uint32_t length)
 {
   for (uint32_t i = 0; i < length; i++)
   {
@@ -53,7 +58,7 @@ static bool null_terminating(const char *str, uint32_t length)
   return false;
 }
 
-static onert::ir::Layout convertLayout(NNFW_LAYOUT layout)
+onert::ir::Layout convertLayout(NNFW_LAYOUT layout)
 {
   if (layout == NNFW_LAYOUT_CHANNELS_LAST)
   {
@@ -92,9 +97,70 @@ NNFW_STATUS getTensorIndexImpl(const onert::ir::Graph &graph, const char *tensor
   }
 }
 
+std::string trim(const std::string &value)
+{
+  std::string whitespace = " \t";
+  auto begin = value.find_first_not_of(whitespace);
+  if (begin == std::string::npos)
+    return ""; // no content
+
+  auto end = value.find_last_not_of(whitespace);
+  auto range = end - begin + 1;
+  return value.substr(begin, range);
+}
+
+using CfgKeyValues = std::unordered_map<std::string, std::string>;
+
+bool loadConfigure(const std::string cfgfile, CfgKeyValues &keyValues)
+{
+  std::ifstream ifs(cfgfile);
+  if (ifs.is_open())
+  {
+    std::string line;
+    while (std::getline(ifs, line))
+    {
+      auto cmtpos = line.find('#');
+      if (cmtpos != std::string::npos)
+      {
+        line = line.substr(0, cmtpos);
+      }
+      std::istringstream isline(line);
+      std::string key;
+      if (std::getline(isline, key, '='))
+      {
+        std::string value;
+        if (std::getline(isline, value))
+        {
+          key = trim(key);
+          keyValues[key] = trim(value);
+        }
+      }
+    }
+    ifs.close();
+    return true;
+  }
+  return false;
+}
+
+void setConfigKeyValues(const CfgKeyValues &keyValues)
+{
+  auto configsrc = std::make_unique<onert::util::GeneralConfigSource>();
+
+  for (auto it = keyValues.begin(); it != keyValues.end(); ++it)
+  {
+    VERBOSE(NNPKG_CONFIGS) << "(" << it->first << ") = (" << it->second << ")" << std::endl;
+    configsrc->set(it->first, it->second);
+  }
+
+  onert::util::config_source_ext(std::move(configsrc));
+}
+
+} // namespace
+
 nnfw_session::nnfw_session()
-    : _subgraphs{nullptr}, _execution{nullptr},
-      _kernel_registry{std::make_shared<onert::frontend::custom::KernelRegistry>()}
+  : _subgraphs{nullptr}, _execution{nullptr},
+    _kernel_registry{std::make_shared<onert::frontend::custom::KernelRegistry>()}, _tracing_ctx{
+                                                                                     nullptr}
 {
   // DO NOTHING
 }
@@ -122,13 +188,65 @@ NNFW_STATUS nnfw_session::load_circle_from_buffer(uint8_t *buffer, size_t size)
     return NNFW_STATUS_ERROR;
   }
 
-  _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs);
+  _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get());
+
+  _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get());
+
+  _state = State::MODEL_LOADED;
+  return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::load_model_from_modelfile(const char *model_file_path)
+{
+  if (!isStateInitialized())
+    return NNFW_STATUS_INVALID_STATE;
+
+  if (!model_file_path)
+  {
+    std::cerr << "Model file path is null." << std::endl;
+    return NNFW_STATUS_UNEXPECTED_NULL;
+  }
+
+  std::string filename{model_file_path};
+  if (filename.size() < 8) // .tflite or .circle
+  {
+    std::cerr << "Invalid model file path." << std::endl;
+    return NNFW_STATUS_ERROR;
+  }
+
+  std::string model_type = filename.substr(filename.size() - 7, 7);
+
+  try
+  {
+    if (model_type == ".tflite")
+    {
+      _subgraphs = onert::tflite_loader::loadModel(filename.c_str());
+    }
+    else if (model_type == ".circle")
+    {
+      _subgraphs = onert::circle_loader::loadModel(filename.c_str());
+    }
+    else
+    {
+      std::cerr << "Unsupported model type" << std::endl;
+      return NNFW_STATUS_ERROR;
+    }
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Error during model loading : " << e.what() << std::endl;
+    return NNFW_STATUS_ERROR;
+  }
+
+  _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get());
+
+  _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get());
 
   _state = State::MODEL_LOADED;
   return NNFW_STATUS_NO_ERROR;
 }
 
-NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir)
+NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir)
 {
   if (!isStateInitialized())
     return NNFW_STATUS_INVALID_STATE;
@@ -166,6 +284,18 @@ NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir)
     mfs >> root;
     const Json::Value &models = root["models"];
     const Json::Value &model_types = root["model-types"];
+    const Json::Value &configs = root["configs"];
+
+    if (!configs.empty() && !configs[0].empty())
+    {
+      auto filepath = package_dir + std::string("/metadata/") + configs[0].asCString();
+
+      CfgKeyValues keyValues;
+      if (loadConfigure(filepath, keyValues))
+      {
+        setConfigKeyValues(keyValues);
+      }
+    }
 
     auto model_file_path = package_dir + std::string("/") + models[0].asString(); // first model
     auto model_type = model_types[0].asString(); // first model's type
@@ -190,7 +320,9 @@ NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir)
     return NNFW_STATUS_ERROR;
   }
 
-  _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs);
+  _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get());
+
+  _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get());
 
   _state = State::MODEL_LOADED;
   return NNFW_STATUS_NO_ERROR;
@@ -225,7 +357,7 @@ NNFW_STATUS nnfw_session::prepare()
   {
     _subgraphs.reset();
     std::shared_ptr<onert::exec::ExecutorMap> executors = _compiler->compile();
-    _execution = std::make_shared<onert::exec::Execution>(executors);
+    _execution = std::make_unique<onert::exec::Execution>(executors);
   }
   catch (const std::exception &e)
   {
@@ -308,8 +440,8 @@ NNFW_STATUS nnfw_session::set_input(uint32_t index, NNFW_TYPE /*type*/, const vo
   if (!buffer && length != 0)
   {
     std::cerr
-        << "Error during nnfw_session::set_input : given buffer is NULL but the length is not 0"
-        << std::endl;
+      << "Error during nnfw_session::set_input : given buffer is NULL but the length is not 0"
+      << std::endl;
     return NNFW_STATUS_ERROR;
   }
 
@@ -337,8 +469,8 @@ NNFW_STATUS nnfw_session::set_output(uint32_t index, NNFW_TYPE /*type*/, void *b
   if (!buffer && length != 0)
   {
     std::cerr
-        << "Error during nnfw_session::set_output : given buffer is NULL but the length is not 0"
-        << std::endl;
+      << "Error during nnfw_session::set_output : given buffer is NULL but the length is not 0"
+      << std::endl;
     return NNFW_STATUS_ERROR;
   }
 
diff --git a/runtime/onert/api/src/nnfw_api_internal.h b/runtime/onert/api/src/nnfw_api_internal.h
index 604ba38b4..a50ac72d3 100644
--- a/runtime/onert/api/src/nnfw_api_internal.h
+++ b/runtime/onert/api/src/nnfw_api_internal.h
@@ -21,6 +21,7 @@
 #include "nnfw_experimental.h"
 
 #include <util/GeneralConfigSource.h>
+#include <util/TracingCtx.h>
 
 #include <string>
 #include <memory>
@@ -100,7 +101,7 @@ public:
   nnfw_session();
   ~nnfw_session();
 
-  NNFW_STATUS load_model_from_file(const char *package_file_path);
+  NNFW_STATUS load_model_from_nnpackage(const char *package_file_path);
   NNFW_STATUS prepare();
   NNFW_STATUS run();
 
@@ -132,6 +133,7 @@ public:
   NNFW_STATUS set_config(const char *key, const char *value);
   NNFW_STATUS get_config(const char *key, char *value, size_t value_size);
   NNFW_STATUS load_circle_from_buffer(uint8_t *buffer, size_t size);
+  NNFW_STATUS load_model_from_modelfile(const char *file_path);
 
   //
   // Experimental API
@@ -154,8 +156,10 @@ private:
   State _state{State::INITIALIZED};
   std::shared_ptr<onert::ir::Subgraphs> _subgraphs;
   std::unique_ptr<onert::compiler::Compiler> _compiler;
-  std::shared_ptr<onert::exec::Execution> _execution;
+  std::unique_ptr<onert::exec::Execution> _execution;
   std::shared_ptr<onert::frontend::custom::KernelRegistry> _kernel_registry;
+
+  std::unique_ptr<onert::util::TracingCtx> _tracing_ctx;
 };
 
 #endif // __API_NNFW_API_INTERNAL_H__
diff --git a/runtime/onert/backend/CMakeLists.txt b/runtime/onert/backend/CMakeLists.txt
index 42d622aa8..dc038c975 100644
--- a/runtime/onert/backend/CMakeLists.txt
+++ b/runtime/onert/backend/CMakeLists.txt
@@ -4,3 +4,5 @@ add_subdirectory(cpu)
 add_subdirectory(acl_cl)
 add_subdirectory(acl_neon)
 add_subdirectory(acl_common)
+add_subdirectory(ruy)
+add_subdirectory(xnnpack)
diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h
index 5c5041378..4f48314c1 100644
--- a/runtime/onert/backend/acl_cl/Backend.h
+++ b/runtime/onert/backend/acl_cl/Backend.h
@@ -20,6 +20,7 @@
 #include <memory>
 #include <backend/Backend.h>
 
+#include "BackendContext.h"
 #include "Config.h"
 #include "ConstantInitializer.h"
 #include "KernelGenerator.h"
@@ -41,21 +42,20 @@ public:
 
   std::shared_ptr<IConfig> config() const override { return _config; }
 
-  std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
-                                             const std::shared_ptr<custom::IKernelBuilder> &,
-                                             bool is_linear_executor) const override
+  std::unique_ptr<backend::BackendContext>
+  newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &,
+             bool is_linear_executor) const override
   {
     const auto &operands = graph.operands();
     const auto &operations = graph.operations();
-    auto context = std::make_unique<BackendContext>(this, &graph);
+    auto context = std::make_unique<acl_cl::BackendContext>(this, &graph);
     auto tm = createTensorManager(is_linear_executor);
     auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
-    auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+    auto tb = std::make_shared<TensorBuilder>(operands, tm);
     context->tensor_registry = tr;
     context->tensor_builder = tb;
     context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
     context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
-    context->tensor_register = nullptr;
     context->optimizer = std::make_shared<Optimizer>(context.get());
     return context;
   }
diff --git a/runtime/onert/backend/acl_cl/BackendContext.cc b/runtime/onert/backend/acl_cl/BackendContext.cc
new file mode 100644
index 000000000..a6f228a4f
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/BackendContext.cc
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "Optimizer.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+void BackendContext::initConsts()
+{
+  for (auto &op : operation_list())
+  {
+    constant_initializer->setLayout(op.layout);
+    graph()->operations().at(op.index).accept(*constant_initializer);
+  }
+
+  for (auto ind : operand_list())
+  {
+    const auto &obj = graph()->operands().at(ind);
+    if (obj.isConstant() && !constant_initializer->exist(ind))
+    {
+      constant_initializer->registerDefaultInitializer(ind, obj);
+    }
+  }
+
+  constant_initializer->run();
+}
+
+void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                 const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
+{
+  ir::OperandIndexMap<uint32_t> uses_map;
+  ir::OperandIndexMap<uint32_t> def_map;
+  ir::OperandIndexSequence constants;
+
+  // Prepare scanning
+  for (auto ind : operand_list())
+  {
+    const auto &obj = graph()->operands().at(ind);
+    const auto &li = lower_info.operand.at(ind);
+    if (li->def_factors().getOnlyElement().backend() != backend())
+      continue;
+
+    // Ignore unused tensor
+    if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
+    {
+      VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process."
+                           << std::endl;
+      return;
+    }
+
+    uses_map[ind] = obj.getUses().size();
+    def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+    if (obj.isConstant())
+      constants.append(ind);
+
+    auto factor = li->def_factors().getOnlyElement();
+    if (!tensor_builder->isRegistered(ind))
+    {
+      // These tensors do not exist in any op_seq (No use and def)
+      const auto info = obj.info();
+      const auto backend_layout = factor.layout();
+      // TODO Change tensor info to have permuted shape
+      tensor_builder->registerTensorInfo(ind, info, backend_layout);
+    }
+  }
+
+  // Start scanning to do notify{First|Last}Use for each tensor
+
+  // If a tensor is a constant, increase the use of the tensor and allocate it first.
+  // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+  // deallocated last.
+  VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
+  for (const auto &ind : constants)
+  {
+    uses_map[ind]++;
+    tensor_builder->notifyFirstUse(ind);
+  }
+
+  // At each operation,
+  // 1. Scan DEF of outputs. If the DEF, allocate it
+  // 2. Scan DEF of inputs. If variable tensor, allocate it
+  // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+  for (const auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    for (const auto &op_idx : op_seq.operations())
+    {
+      auto &op = graph()->operations().at(op_idx);
+      auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+      auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+      // Define outputs
+      for (const auto &ind : op_outputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        assert(def_map.find(ind) != def_map.end());
+        if (def_map[ind])
+        {
+          def_map[ind] = 0;
+          tensor_builder->notifyFirstUse(ind);
+        }
+      }
+
+      // Scan variable tensors
+      // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+      // non-constant because of less memory usage by memory planning in here
+      for (const auto &ind : op_inputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        const auto &operand = graph()->operands().at(ind);
+        if (operand.info().isVariable())
+        {
+          // The variable tensor with buffer is not supported yet
+          assert(operand.data() == nullptr);
+          assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+          assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
+                 lower_info.operand.at(ind)->use_factors().size() == 1);
+          assert(uses_map[ind] == 1 && def_map[ind] == 0);
+          tensor_builder->notifyFirstUse(ind);
+        }
+      }
+
+      for (const auto &ind : op_inputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        assert(uses_map.find(ind) != uses_map.end());
+        assert(uses_map[ind] > 0);
+        uses_map[ind]--;
+        if (uses_map[ind] == 0)
+        {
+          // plan for deallocation of static tensornode
+          tensor_builder->notifyLastUse(ind);
+        }
+      }
+    }
+  }
+
+  // Dispose and validate
+  for (const auto &ind : constants)
+  {
+    --uses_map[ind];
+    if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+    {
+      tensor_builder->notifyLastUse(ind);
+    }
+  }
+
+  assert(
+      std::all_of(uses_map.begin(), uses_map.end(),
+                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+  assert(
+      std::all_of(def_map.begin(), def_map.end(),
+                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                            const ir::OpSequences &op_seqs,
+                                            const ir::LowerInfoMap &lower_info)
+{
+  optimizer->optimize();
+
+  for (const auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+                    ir::Remove::DUPLICATED;
+    for (const auto op_ind : op_seq)
+    {
+      bool op_assigned = [&]() {
+        for (auto &op_info : operation_list())
+          if (op_info.index == op_ind)
+            return true;
+        return false;
+      }();
+      if (!op_assigned)
+        continue;
+
+      const auto &op = graph()->operations().at(op_ind);
+      for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED)
+      {
+        if (!tensor_builder->isRegistered(index) && !model_io.contains(index) &&
+            find(operand_list().begin(), operand_list().end(), index) != operand_list().end())
+        {
+          const auto &operand_lower_info =
+              lower_info.operand.at(index)->def_factors().getOnlyElement();
+
+          // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
+          // op.getOutputs() of permute (CPU) returns tensor A
+          // but tensor A belongs to the backend of acl_cl.
+          // So, we have to make this tensor NOT registered for CPU.
+          if (operand_lower_info.backend() != backend())
+            continue;
+
+          const auto &obj = graph()->operands().at(index);
+          const auto frontend_layout = op_seq.getLayout();
+          const auto backend_layout = operand_lower_info.layout();
+          ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+                                       obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+          tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+        }
+      }
+    }
+  }
+
+  // TODO Get compiler options from compiler, and use it rather than getting it from Env
+  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+  {
+    planTensors(order, op_seqs, lower_info);
+  }
+  else
+  {
+    // For the executors that does not have fixed linear execution order:
+    // To make tensors never be deallocated, this is a workaround to use static memory planner
+    for (auto ind : operand_list())
+    {
+      if (tensor_builder->isRegistered(ind))
+        tensor_builder->notifyFirstUse(ind);
+    }
+  }
+
+  tensor_builder->prepare();
+
+  return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                       const ir::OpSequences &op_seqs)
+{
+  FunctionMap ret;
+
+  for (auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    bool assigned = [&]() {
+      for (auto op_info : operation_list())
+        if (op_seq.exist(op_info.index))
+          return true;
+      return false;
+    }();
+    if (!assigned)
+      continue;
+    auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+    ret.emplace_back(op_seq_ind, std::move(fn_seq));
+  }
+
+  tensor_builder->allocate();
+  initConsts();
+
+  // NOTE For memory optimization, we want to free some operand data
+  for (auto ind : operand_list())
+  {
+    // TODO Remove const_cast
+    auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+    obj.releaseData();
+  }
+
+  for (auto &it : ret)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) {
+      ifunc.prepare();
+      tensor_builder->postFunctionPrepare();
+    });
+  }
+
+  return ret;
+}
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/BackendContext.h b/runtime/onert/backend/acl_cl/BackendContext.h
new file mode 100644
index 000000000..662d767d0
--- /dev/null
+++ b/runtime/onert/backend/acl_cl/BackendContext.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_cl
+{
+
+class Optimizer;
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, const ir::Graph *graph,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+      : onert::backend::BackendContext(backend, graph, tensor_registry),
+        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+        kernel_gen{kernel_gen}
+  {
+  }
+
+  ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                              const ir::OpSequences &op_seqs,
+                              const ir::LowerInfoMap &lower_info) override;
+  FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+                         const ir::OpSequences &op_seqs) override;
+
+private:
+  void initConsts();
+  void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                   const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+  std::shared_ptr<TensorBuilder> tensor_builder;
+  std::shared_ptr<ConstantInitializer> constant_initializer;
+  std::shared_ptr<KernelGenerator> kernel_gen;
+  std::shared_ptr<Optimizer> optimizer;
+};
+
+} // namespace acl_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
index b45b91058..413a7ccc3 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
@@ -112,7 +112,7 @@ void ConstantInitializer::visit(const ir::operation::Reverse &node)
   const auto &axis_obj = _operands.at(axis_index);
 
   const auto ifm_rank = input_obj.shape().rank();
-  const auto frontend_layout = this->_current_op_seq_layout;
+  const auto frontend_layout = this->_current_layout;
 
   auto output_tensor = this->_tensor_reg->getITensor(output_index);
   const auto backend_layout = output_tensor->layout();
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.h b/runtime/onert/backend/acl_cl/ConstantInitializer.h
index 9f3acb461..fc0eca84f 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__
 
 #include "AclConstantInitializer.h"
 
@@ -45,4 +45,4 @@ public:
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_ACL_CL_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc
index e7690af2e..3a5ea5a0f 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc
@@ -49,7 +49,7 @@ KernelGenerator::KernelGenerator(
     const std::shared_ptr<TensorBuilder> &tensor_builder,
     const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
     : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
-      _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
+      _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN)
 {
   // DO NOTHING
 }
@@ -62,7 +62,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
   _return_fn_seq = std::make_unique<exec::FunctionSequence>();
   _return_fn_seq->enableDynamicShapeInferer(false);
 
-  _current_op_seq_layout = op_seq.getLayout();
+  _current_layout = op_seq.getLayout();
   for (const auto &operation_idx : op_seq.operations())
   {
     const auto &node = _operations_ctx.at(operation_idx);
@@ -78,6 +78,25 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
   const auto block_size_index{
       node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
 
+  const auto NNApiInputs = 2;
+  if (node.getInputs().size() != NNApiInputs)
+  {
+    const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
+    if (!_ctx.at(crops_index).isConstant())
+    {
+      throw std::runtime_error("Non-constant crops NYI for acl_cl backend BatchToSpaceND");
+    }
+
+    auto crops = _ctx.at(crops_index).asVector<int32_t>();
+    for (auto crop : crops)
+    {
+      if (crop != 0)
+      {
+        throw std::runtime_error("Non-zero crops NYI for acl_cl backend BatchToSpaceND");
+      }
+    }
+  }
+
   auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
   auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
   auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
@@ -152,8 +171,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
   const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
   const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
 
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
@@ -189,8 +208,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
 
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
   // Kernel format is [1, kernel_height, kernel_width, depth_out].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
@@ -255,7 +274,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
   else
   {
     const auto rank = _ctx.at(ofm_index).shape().rank();
-    const auto frontend_layout = _current_op_seq_layout;
+    const auto frontend_layout = _current_layout;
     const auto backend_layout = output_tensor->layout();
     const auto fixed_axis =
         acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
@@ -277,7 +296,7 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
 
   auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor,
                                                 ::arm_compute::CLFullyConnectedReshapingLayer>(
-      node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
+      node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
   _return_fn = std::make_unique<exec::FunctionSequence>(
       std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
 }
@@ -296,7 +315,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   // Convert to ACL axes taking into account negative values and possible duplicates.
   const auto &axes = _ctx.at(axes_index);
   const auto input_rank = _ctx.at(input_index).shape().rank();
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = input_tensor->layout();
 
   std::unique_ptr<arm_compute::IFunction> fn;
@@ -329,7 +348,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
 
   // NOTE This operation must not be changed the layout from frontend to backend
   //      So, PermutationOperationPass makes layouts of frontend and backend the same.
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = output_tensor->layout();
   assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
          frontend_layout == backend_layout);
@@ -388,7 +407,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
 
   auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
   auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = inputData_tensor->layout();
 
   // Set initializers for indices data such as order of inputData
@@ -455,7 +474,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
 
   auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
   auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = inputData_tensor->layout();
 
   // Set initializers for indices data such as order of inputData
@@ -557,7 +576,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
 
   auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
   auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = ifm_tensor->layout();
 
   const auto &perms = _ctx.at(perm_idx);
@@ -836,7 +855,7 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
   auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx);
 
   const size_t output_rank = _ctx.at(output_idx).shape().rank();
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = output_tensor->layout();
   int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
   axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
@@ -887,7 +906,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
   for (const auto &input_index : input_indexes)
     inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout();
 
   if (axis < 0)
@@ -923,8 +942,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
 void KernelGenerator::visit(const ir::operation::Pool2D &node)
 {
   auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
-      node, _ctx, _tensor_reg, _current_op_seq_layout,
-      acl_common::convertPoolType(node.param().op_type));
+      node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
 
   const auto ofm_index{node.getOutputs().at(0)};
   auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
@@ -1169,9 +1187,9 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
   const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
   const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
 
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_layout);
 
   const auto stride = node.param().stride;
 
@@ -1270,7 +1288,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   UNUSED_RELEASE(backend_layout);
   assert(backend_layout == ifm_tensor->layout());
   assert(backend_layout == indices_tensor->layout());
-  assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
+  assert(ifm_rank < 4 || _current_layout == backend_layout);
 
   // input is n-D, indices k-D, output is (n + k - 1)-D
   size_t n = ifm_rank;
@@ -1306,11 +1324,11 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   _return_fn = asAclFunction(std::move(fn));
 }
 
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
-  const auto axis_index{node.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
+  const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
+  const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
 
   auto ifm_shape = _ctx.at(ifm_index).shape();
   auto ofm_shape = _ctx.at(ofm_index).shape();
@@ -1320,7 +1338,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
   auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
   auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
   const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
-  auto frontend_layout = _current_op_seq_layout;
+  auto frontend_layout = _current_layout;
   auto backend_layout = ifm_tensor->layout();
 
   int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
@@ -1331,10 +1349,10 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
 
   auto acl_axis =
       acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
-
+  auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
+                                             : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
   auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayerEx>(
-      ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
-      ::arm_compute::ReductionOperation::ARG_IDX_MAX);
+      ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), reduce_type);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -1400,7 +1418,7 @@ void KernelGenerator::visit(const ir::operation::Split &node)
   for (const auto &ofm_ind : output_indexes)
     output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = ifm_tensor->layout();
   auto axis = _ctx.at(axis_index).asScalar<int32_t>();
   if (axis < 0)
@@ -1439,7 +1457,7 @@ void KernelGenerator::visit(const ir::operation::SplitV &node)
   {
     int32_t split_dim = split_dim_op.asScalar<int32_t>();
     uint32_t split_dim_revised = (split_dim < 0) ? (split_dim + ifm_rank) : split_dim;
-    const auto frontend_layout = _current_op_seq_layout;
+    const auto frontend_layout = _current_layout;
     const auto backend_layout = ifm_tensor->layout();
 
     if (ifm_tensor->num_dimensions() != ifm_tensor->info()->num_dimensions())
@@ -1483,7 +1501,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
   for (const auto &output_index : output_indexes)
     outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
   if (axis < 0)
     axis += input_rank;
@@ -1526,7 +1544,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
   auto input = _tensor_reg->getAclTensor(input_index)->handle();
   auto output = _tensor_reg->getAclTensor(output_index)->handle();
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
 
   ::arm_compute::PaddingList padding_list;
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.h b/runtime/onert/backend/acl_cl/KernelGenerator.h
index e8a922677..22a7c18a3 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.h
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.h
@@ -17,7 +17,7 @@
 #ifndef __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
 #define __ONERT_BACKEND_ACL_CL_KERNEL_GENERATOR_H__
 
-#include <backend/IKernelGenerator.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
 
 #include "ir/Operands.h"
 #include "TensorBuilder.h"
@@ -31,7 +31,7 @@ namespace backend
 namespace acl_cl
 {
 
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public cpu_common::KernelGeneratorBase
 {
 public:
   KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
@@ -39,60 +39,61 @@ public:
                   const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
 
   void visit(const ir::OpSequence &) override;
+
+  void visit(const ir::operation::ArgMinMax &) override;
   void visit(const ir::operation::BatchToSpaceND &) override;
   void visit(const ir::operation::BinaryArithmetic &) override;
+  void visit(const ir::operation::Comparison &) override;
+  void visit(const ir::operation::Concat &) override;
   void visit(const ir::operation::Conv2D &) override;
+  void visit(const ir::operation::ConvertFp16ToFp32 &) override;
+  void visit(const ir::operation::ConvertFp32ToFp16 &) override;
+  void visit(const ir::operation::DepthToSpace &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::Concat &) override;
-  void visit(const ir::operation::FullyConnected &) override;
-  void visit(const ir::operation::Reduce &) override;
-  void visit(const ir::operation::Reshape &) override;
-  void visit(const ir::operation::Squeeze &) override;
-  void visit(const ir::operation::Softmax &) override;
-  void visit(const ir::operation::Slice &) override;
-  void visit(const ir::operation::StridedSlice &) override;
-  void visit(const ir::operation::Transpose &) override;
   void visit(const ir::operation::ElementwiseActivation &) override;
   void visit(const ir::operation::ElementwiseBinary &) override;
   void visit(const ir::operation::ElementwiseUnary &) override;
+  void visit(const ir::operation::EmbeddingLookup &) override;
   void visit(const ir::operation::ExpandDims &) override;
+  void visit(const ir::operation::FullyConnected &) override;
+  void visit(const ir::operation::Gather &) override;
+  void visit(const ir::operation::HashtableLookup &) override;
   void visit(const ir::operation::InstanceNorm &) override;
-  void visit(const ir::operation::Comparison &) override;
+  void visit(const ir::operation::L2Normalization &) override;
+  void visit(const ir::operation::LocalResponseNormalization &) override;
   void visit(const ir::operation::LSTM &) override;
   void visit(const ir::operation::OneHot &) override;
   void visit(const ir::operation::Pack &) override;
-  void visit(const ir::operation::Pool2D &) override;
+  void visit(const ir::operation::Pad &) override;
   void visit(const ir::operation::Permute &) override;
+  void visit(const ir::operation::Pool2D &) override;
+  void visit(const ir::operation::PReLU &) override;
+  void visit(const ir::operation::Reduce &) override;
+  void visit(const ir::operation::Reshape &) override;
   void visit(const ir::operation::ResizeBilinear &) override;
   void visit(const ir::operation::ResizeNearestNeighbor &) override;
+  void visit(const ir::operation::Reverse &) override;
   void visit(const ir::operation::RNN &) override;
+  void visit(const ir::operation::Slice &) override;
+  void visit(const ir::operation::Softmax &) override;
   void visit(const ir::operation::SpaceToBatchND &) override;
   void visit(const ir::operation::SpaceToDepth &) override;
-  void visit(const ir::operation::EmbeddingLookup &) override;
-  void visit(const ir::operation::L2Normalization &) override;
-  void visit(const ir::operation::HashtableLookup &) override;
-  void visit(const ir::operation::PReLU &) override;
-  void visit(const ir::operation::TransposeConv &) override;
-  void visit(const ir::operation::SquaredDifference &) override;
-  void visit(const ir::operation::TopKV2 &) override;
-  void visit(const ir::operation::Gather &) override;
-  void visit(const ir::operation::ArgMax &) override;
-  void visit(const ir::operation::LocalResponseNormalization &) override;
-  void visit(const ir::operation::DepthToSpace &) override;
   void visit(const ir::operation::Split &) override;
   void visit(const ir::operation::SplitV &) override;
+  void visit(const ir::operation::SquaredDifference &) override;
+  void visit(const ir::operation::Squeeze &) override;
+  void visit(const ir::operation::StridedSlice &) override;
+  void visit(const ir::operation::TopKV2 &) override;
+  void visit(const ir::operation::Transpose &) override;
+  void visit(const ir::operation::TransposeConv &) override;
   void visit(const ir::operation::Unpack &) override;
-  void visit(const ir::operation::Pad &) override;
-  void visit(const ir::operation::ConvertFp32ToFp16 &) override;
-  void visit(const ir::operation::ConvertFp16ToFp32 &) override;
-  void visit(const ir::operation::Reverse &) override;
 
 private:
   const ir::Operands &_ctx;
   const ir::Operations &_operations_ctx;
   std::shared_ptr<TensorBuilder> _tensor_builder;
   std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
-  ir::Layout _current_op_seq_layout;
+  ir::Layout _current_layout;
 };
 
 } // namespace acl_cl
diff --git a/runtime/onert/backend/acl_cl/Optimizer.h b/runtime/onert/backend/acl_cl/Optimizer.h
index 18d38ec1b..ad5154860 100644
--- a/runtime/onert/backend/acl_cl/Optimizer.h
+++ b/runtime/onert/backend/acl_cl/Optimizer.h
@@ -17,8 +17,7 @@
 #ifndef __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__
 #define __ONERT_BACKEND_ACL_CL_OPTIMIZER_H__
 
-#include <backend/IOptimizer.h>
-#include <backend/BackendContext.h>
+#include "BackendContext.h"
 #include "TensorBuilder.h"
 
 namespace onert
@@ -28,12 +27,12 @@ namespace backend
 namespace acl_cl
 {
 
-class Optimizer : public IOptimizer
+class Optimizer
 {
 public:
   Optimizer(BackendContext *context);
 
-  void optimize() override;
+  void optimize();
 
 private:
   BackendContext *_context;
diff --git a/runtime/onert/backend/acl_cl/acl_cl.cc b/runtime/onert/backend/acl_cl/acl_cl.cc
index 88378b13a..82cbde02f 100644
--- a/runtime/onert/backend/acl_cl/acl_cl.cc
+++ b/runtime/onert/backend/acl_cl/acl_cl.cc
@@ -14,20 +14,11 @@
  * limitations under the License.
  */
 
-#include <util/logging.h>
-
 #include "Backend.h"
 
 extern "C" {
-onert::backend::Backend *onert_backend_create()
-{
-  VERBOSE(onert_backend_create) << "'acl_cl' loaded\n";
-  return new onert::backend::acl_cl::Backend;
-}
 
-void onert_backend_destroy(onert::backend::Backend *backend)
-{
-  VERBOSE(onert_backend_create) << "'acl_cl' unloaded\n";
-  delete backend;
-}
+onert::backend::Backend *onert_backend_create() { return new onert::backend::acl_cl::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
 }
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.cc b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
index 21f41a3e6..921d107d9 100644
--- a/runtime/onert/backend/acl_common/AclConstantInitializer.cc
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
@@ -25,7 +25,7 @@ namespace acl_common
 
 AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands,
                                                const std::shared_ptr<ITensorRegistry> &tensor_reg)
-    : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
+    : cpu_common::ConstantInitializerBase{operands}, _tensor_reg{tensor_reg}
 {
   // DO NOTHING
 }
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h
index 52f4c54cf..894e2e7d1 100644
--- a/runtime/onert/backend/acl_common/AclConstantInitializer.h
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h
@@ -17,7 +17,7 @@
 #ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
 #define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
 
-#include <backend/IConstantInitializer.h>
+#include <backend/cpu_common/ConstantInitializerBase.h>
 #include <ir/Operands.h>
 #include "AclTensorRegistry.h"
 
@@ -28,7 +28,7 @@ namespace backend
 namespace acl_common
 {
 
-class AclConstantInitializer : public IConstantInitializer
+class AclConstantInitializer : public cpu_common::ConstantInitializerBase
 {
 public:
   AclConstantInitializer(const ir::Operands &operands,
diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h
index bb7abc95d..12e9ab894 100644
--- a/runtime/onert/backend/acl_common/AclTensorBuilder.h
+++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h
@@ -21,7 +21,6 @@
 #include <queue>
 
 #include <arm_compute/core/Types.h>
-#include <backend/ITensorBuilder.h>
 #include "ir/OperandIndexMap.h"
 #include <ir/Operands.h>
 #include "AclTensorManager.h"
@@ -43,14 +42,12 @@ enum class UsesType
   LAST
 };
 
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-class AclTensorBuilder : public ITensorBuilder
+template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> class AclTensorBuilder
 {
 public:
   using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>;
 
-  AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
-                   const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg);
+  AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr);
 
   /**
    * @brief     Register tensor information to allocate on ACL-CL backend
@@ -59,16 +56,16 @@ public:
    * @param[in] layout Tensor data layout
    */
   void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                          ir::Layout backend_layout) override;
+                          ir::Layout backend_layout);
 
-  void notifyFirstUse(const ir::OperandIndex &) override;
-  void notifyLastUse(const ir::OperandIndex &) override;
+  void notifyFirstUse(const ir::OperandIndex &);
+  void notifyLastUse(const ir::OperandIndex &);
 
-  bool isRegistered(const ir::OperandIndex &) const override;
+  bool isRegistered(const ir::OperandIndex &) const;
 
-  void prepare(void) override;
-  void allocate() override;
-  void postFunctionPrepare() override;
+  void prepare(void);
+  void allocate();
+  void postFunctionPrepare();
 
   T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); }
 
@@ -105,7 +102,6 @@ private:
   ir::OperandIndexMap<size_t> _uses_count_map;
 
   std::unique_ptr<T_AclTensorManager> _tensor_mgr;
-  std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> _tensor_reg;
 
   // for linear executor
   std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
@@ -133,10 +129,9 @@ namespace acl_common
 {
 
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(
-    const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
-    const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg)
-    : _operands{operands}, _tensor_mgr{tensor_mgr}, _tensor_reg{tensor_reg}
+AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands,
+                                                                     T_AclTensorManager *tensor_mgr)
+    : _operands{operands}, _tensor_mgr{tensor_mgr}
 {
   assert(_tensor_mgr);
 }
diff --git a/runtime/onert/backend/acl_common/Convert.cc b/runtime/onert/backend/acl_common/Convert.cc
index 67d9d7176..7d3a69032 100644
--- a/runtime/onert/backend/acl_common/Convert.cc
+++ b/runtime/onert/backend/acl_common/Convert.cc
@@ -109,13 +109,19 @@ namespace acl_common
     case ir::DataType::UINT8:
       return ::arm_compute::DataType::U8;
     case ir::DataType::QUANT_INT8_SYMM:
-      return ::arm_compute::DataType::S8;
+      return ::arm_compute::DataType::QSYMM8;
+    case ir::DataType::QUANT_INT8_ASYMM:
+      return ::arm_compute::DataType::QASYMM8_SIGNED;
     case ir::DataType::FLOAT16:
       return ::arm_compute::DataType::F16;
     case ir::DataType::INT64:
       return ::arm_compute::DataType::S64;
+    case ir::DataType::QUANT_INT16_ASYMM:
+      return ::arm_compute::DataType::QASYMM16;
+    case ir::DataType::QUANT_INT8_SYMM_PER_CHANNEL:
+      return ::arm_compute::DataType::QSYMM8_PER_CHANNEL;
     default:
-      throw std::runtime_error("Not supported, yet");
+      throw std::runtime_error("Not supported internal data type, yet");
       break;
   }
 }
@@ -175,7 +181,7 @@ namespace acl_common
       return ::arm_compute::ActivationLayerInfo{
           ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.0f, 0.0f};
     default:
-      throw std::runtime_error{"Not supported, yet"};
+      throw std::runtime_error{"Not supported internal activation, yet"};
       break;
   }
 }
@@ -219,7 +225,7 @@ asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type,
       return ::arm_compute::ActivationLayerInfo{
           ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
     default:
-      throw std::runtime_error{"Not supported, yet"};
+      throw std::runtime_error{"Not supported internal elementwise activation, yet"};
       break;
   }
 }
@@ -295,6 +301,8 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type)
       return ir::DataType::UINT32;
     case ::arm_compute::DataType::QASYMM8:
       return ir::DataType::QUANT_UINT8_ASYMM;
+    case ::arm_compute::DataType::QASYMM8_SIGNED:
+      return ir::DataType::QUANT_INT8_ASYMM;
     case ::arm_compute::DataType::U8:
       return ir::DataType::UINT8;
     case ::arm_compute::DataType::QSYMM8:
@@ -304,7 +312,7 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type)
     case ::arm_compute::DataType::S64:
       return ir::DataType::INT64;
     default:
-      throw std::runtime_error{"Not supported, yet"};
+      throw std::runtime_error{"Not supported acl data type, yet"};
       break;
   }
 }
diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h
index 35d6e4e8e..b11c19733 100644
--- a/runtime/onert/backend/acl_neon/Backend.h
+++ b/runtime/onert/backend/acl_neon/Backend.h
@@ -21,6 +21,7 @@
 #include <backend/Backend.h>
 #include <ir/Operands.h>
 
+#include "BackendContext.h"
 #include "Config.h"
 #include "ConstantInitializer.h"
 #include "KernelGenerator.h"
@@ -41,21 +42,20 @@ public:
 
   std::shared_ptr<IConfig> config() const override { return _config; }
 
-  std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
-                                             const std::shared_ptr<custom::IKernelBuilder> &,
-                                             bool is_linear_executor) const override
+  std::unique_ptr<backend::BackendContext>
+  newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &,
+             bool is_linear_executor) const override
   {
     const auto &operands = graph.operands();
     const auto &operations = graph.operations();
-    auto context = std::make_unique<BackendContext>(this, &graph);
+    auto context = std::make_unique<acl_neon::BackendContext>(this, &graph);
     auto tm = createTensorManager(is_linear_executor);
     auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
-    auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+    auto tb = std::make_shared<TensorBuilder>(operands, tm);
     context->tensor_registry = tr;
     context->tensor_builder = tb;
     context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
     context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
-    context->tensor_register = nullptr;
     context->optimizer = std::make_shared<Optimizer>(context.get());
     return context;
   }
diff --git a/runtime/onert/backend/acl_neon/BackendContext.cc b/runtime/onert/backend/acl_neon/BackendContext.cc
new file mode 100644
index 000000000..8b53171f7
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/BackendContext.cc
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "Optimizer.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+void BackendContext::initConsts()
+{
+  for (auto &op : operation_list())
+  {
+    constant_initializer->setLayout(op.layout);
+    graph()->operations().at(op.index).accept(*constant_initializer);
+  }
+
+  for (auto ind : operand_list())
+  {
+    const auto &obj = graph()->operands().at(ind);
+    if (obj.isConstant() && !constant_initializer->exist(ind))
+    {
+      constant_initializer->registerDefaultInitializer(ind, obj);
+    }
+  }
+
+  constant_initializer->run();
+}
+
+void BackendContext::planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                 const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
+{
+  ir::OperandIndexMap<uint32_t> uses_map;
+  ir::OperandIndexMap<uint32_t> def_map;
+  ir::OperandIndexSequence constants;
+
+  // Prepare scanning
+  for (auto ind : operand_list())
+  {
+    const auto &obj = graph()->operands().at(ind);
+    const auto &li = lower_info.operand.at(ind);
+    if (li->def_factors().getOnlyElement().backend() != backend())
+      continue;
+
+    // Ignore unused tensor
+    if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
+    {
+      VERBOSE(planTensors) << "Operand #" << ind.value() << " will not be used. no more process."
+                           << std::endl;
+      return;
+    }
+
+    uses_map[ind] = obj.getUses().size();
+    def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+    if (obj.isConstant())
+      constants.append(ind);
+
+    auto factor = li->def_factors().getOnlyElement();
+    if (!tensor_builder->isRegistered(ind))
+    {
+      // These tensors do not exist in any op_seq (No use and def)
+      const auto info = obj.info();
+      const auto backend_layout = factor.layout();
+      // TODO Change tensor info to have permuted shape
+      tensor_builder->registerTensorInfo(ind, info, backend_layout);
+    }
+  }
+
+  // Start scanning to do notify{First|Last}Use for each tensor
+
+  // If a tensor is a constant, increase the use of the tensor and allocate it first.
+  // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+  // deallocated last.
+  VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
+  for (const auto &ind : constants)
+  {
+    uses_map[ind]++;
+    tensor_builder->notifyFirstUse(ind);
+  }
+
+  // At each operation,
+  // 1. Scan DEF of outputs. If the DEF, allocate it
+  // 2. Scan DEF of inputs. If variable tensor, allocate it
+  // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+  for (const auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    for (const auto &op_idx : op_seq.operations())
+    {
+      auto &op = graph()->operations().at(op_idx);
+      auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+      auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+      // Define outputs
+      for (const auto &ind : op_outputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        assert(def_map.find(ind) != def_map.end());
+        if (def_map[ind])
+        {
+          def_map[ind] = 0;
+          tensor_builder->notifyFirstUse(ind);
+        }
+      }
+
+      // Scan variable tensors
+      // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+      // non-constant because of less memory usage by memory planning in here
+      for (const auto &ind : op_inputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        const auto &operand = graph()->operands().at(ind);
+        if (operand.info().isVariable())
+        {
+          // The variable tensor with buffer is not supported yet
+          assert(operand.data() == nullptr);
+          assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+          assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
+                 lower_info.operand.at(ind)->use_factors().size() == 1);
+          assert(uses_map[ind] == 1 && def_map[ind] == 0);
+          tensor_builder->notifyFirstUse(ind);
+        }
+      }
+
+      for (const auto &ind : op_inputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        assert(uses_map.find(ind) != uses_map.end());
+        assert(uses_map[ind] > 0);
+        uses_map[ind]--;
+        if (uses_map[ind] == 0)
+        {
+          // plan for deallocation of static tensornode
+          tensor_builder->notifyLastUse(ind);
+        }
+      }
+    }
+  }
+
+  // Dispose and validate
+  for (const auto &ind : constants)
+  {
+    --uses_map[ind];
+    if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+    {
+      tensor_builder->notifyLastUse(ind);
+    }
+  }
+
+  assert(
+      std::all_of(uses_map.begin(), uses_map.end(),
+                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+  assert(
+      std::all_of(def_map.begin(), def_map.end(),
+                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                            const ir::OpSequences &op_seqs,
+                                            const ir::LowerInfoMap &lower_info)
+{
+  optimizer->optimize();
+
+  for (const auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+                    ir::Remove::DUPLICATED;
+    for (const auto op_ind : op_seq)
+    {
+      bool op_assigned = [&]() {
+        for (auto &op_info : operation_list())
+          if (op_info.index == op_ind)
+            return true;
+        return false;
+      }();
+      if (!op_assigned)
+        continue;
+
+      const auto &op = graph()->operations().at(op_ind);
+      for (const auto &index : (op.getInputs() + op.getOutputs()) | ir::Remove::UNDEFINED)
+      {
+        if (!tensor_builder->isRegistered(index) && !model_io.contains(index) &&
+            find(operand_list().begin(), operand_list().end(), index) != operand_list().end())
+        {
+          const auto &operand_lower_info =
+              lower_info.operand.at(index)->def_factors().getOnlyElement();
+
+          // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
+          // op.getOutputs() of permute (CPU) returns tensor A
+          // but tensor A belongs to the backend of acl_cl.
+          // So, we have to make this tensor NOT registered for CPU.
+          if (operand_lower_info.backend() != backend())
+            continue;
+
+          const auto &obj = graph()->operands().at(index);
+          const auto frontend_layout = op_seq.getLayout();
+          const auto backend_layout = operand_lower_info.layout();
+          ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+                                       obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+          tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+        }
+      }
+    }
+  }
+
+  // TODO Get compiler options from compiler, and use it rather than getting it from Env
+  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+  {
+    planTensors(order, op_seqs, lower_info);
+  }
+  else
+  {
+    // For the executors that does not have fixed linear execution order:
+    // To make tensors never be deallocated, this is a workaround to use static memory planner
+    for (auto ind : operand_list())
+    {
+      if (tensor_builder->isRegistered(ind))
+        tensor_builder->notifyFirstUse(ind);
+    }
+  }
+
+  tensor_builder->prepare();
+
+  return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                       const ir::OpSequences &op_seqs)
+{
+  FunctionMap ret;
+
+  for (auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    bool assigned = [&]() {
+      for (auto op_info : operation_list())
+        if (op_seq.exist(op_info.index))
+          return true;
+      return false;
+    }();
+    if (!assigned)
+      continue;
+    auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+    ret.emplace_back(op_seq_ind, std::move(fn_seq));
+  }
+
+  tensor_builder->allocate();
+  initConsts();
+
+  // NOTE For memory optimization, we want to free some operand data
+  for (auto ind : operand_list())
+  {
+    // TODO Remove const_cast
+    auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+    obj.releaseData();
+  }
+
+  for (auto &it : ret)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) {
+      ifunc.prepare();
+      tensor_builder->postFunctionPrepare();
+    });
+  }
+
+  return ret;
+}
+
+} // namespace neon
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_neon/BackendContext.h b/runtime/onert/backend/acl_neon/BackendContext.h
new file mode 100644
index 000000000..dd764c091
--- /dev/null
+++ b/runtime/onert/backend/acl_neon/BackendContext.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_neon
+{
+
+class Optimizer;
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, const ir::Graph *graph,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+      : onert::backend::BackendContext(backend, graph, tensor_registry),
+        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+        kernel_gen{kernel_gen}
+  {
+  }
+
+  ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                              const ir::OpSequences &op_seqs,
+                              const ir::LowerInfoMap &lower_info) override;
+  FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+                         const ir::OpSequences &op_seqs) override;
+
+private:
+  void initConsts();
+  void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                   const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+  // TODO Make it private
+  std::shared_ptr<TensorBuilder> tensor_builder;
+  std::shared_ptr<ConstantInitializer> constant_initializer;
+  std::shared_ptr<KernelGenerator> kernel_gen;
+  std::shared_ptr<Optimizer> optimizer;
+};
+
+} // namespace acl_neon
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.h b/runtime/onert/backend/acl_neon/ConstantInitializer.h
index c7d71cdcf..9723ba012 100644
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__
 
 #include "AclConstantInitializer.h"
 
@@ -41,4 +41,4 @@ public:
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_ACL_NEON_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc
index ffaee3b3e..e712dfa81 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc
@@ -48,7 +48,7 @@ KernelGenerator::KernelGenerator(
     const std::shared_ptr<TensorBuilder> &tensor_builder,
     const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
     : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
-      _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
+      _tensor_reg(tensor_reg), _current_layout(ir::Layout::UNKNOWN)
 {
   // DO NOTHING
 }
@@ -61,7 +61,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
   _return_fn_seq = std::make_unique<exec::FunctionSequence>();
   _return_fn_seq->enableDynamicShapeInferer(false);
 
-  _current_op_seq_layout = op_seq.getLayout();
+  _current_layout = op_seq.getLayout();
   for (const auto &operation_idx : op_seq.operations())
   {
     const auto &node = _operations_ctx.at(operation_idx);
@@ -70,17 +70,17 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
   }
 }
 
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
 {
   const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
-  const auto axis_index{node.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
+  const auto ifm_index{node.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
+  const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
 
   const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
 
   auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
   auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
-  auto frontend_layout = _current_op_seq_layout;
+  auto frontend_layout = _current_layout;
   auto backend_layout = ifm_tensor->layout();
 
   int axis_value = _ctx.at(axis_index).asScalar<int32_t>();
@@ -91,10 +91,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
   assert(axis_value >= 0 && axis_value < ifm_rank);
   const auto fixed_axis =
       acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
+  auto reduce_type = node.param().is_arg_max ? ::arm_compute::ReductionOperation::ARG_IDX_MAX
+                                             : ::arm_compute::ReductionOperation::ARG_IDX_MIN;
 
   auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>(
-      ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
-      arm_compute::ReductionOperation::ARG_IDX_MAX);
+      ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), reduce_type);
 
   _return_fn = asAclFunction(std::move(fn));
 }
@@ -106,6 +107,25 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
   const auto block_size_index{
       node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
 
+  const auto NNApiInputs = 2;
+  if (node.getInputs().size() != NNApiInputs)
+  {
+    const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
+    if (!_ctx.at(crops_index).isConstant())
+    {
+      throw std::runtime_error("Non-constant crops NYI for acl_neon backend BatchToSpaceND");
+    }
+
+    auto crops = _ctx.at(crops_index).asVector<int32_t>();
+    for (auto crop : crops)
+    {
+      if (crop != 0)
+      {
+        throw std::runtime_error("Non-zero crops NYI for acl_neon backend BatchToSpaceND");
+      }
+    }
+  }
+
   auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
   auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index);
   auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index);
@@ -178,8 +198,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
   const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
   const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
 
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
@@ -232,8 +252,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
 
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
   // Kernel format is [1, kernel_height, kernel_width, depth_out].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
@@ -297,7 +317,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
   else
   {
     const auto rank = _ctx.at(ofm_index).shape().rank();
-    const auto frontend_layout = _current_op_seq_layout;
+    const auto frontend_layout = _current_layout;
     const auto backend_layout = output_tensor->layout();
     const auto fixed_axis =
         acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
@@ -495,7 +515,7 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
 
   auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
                                                 ::arm_compute::NEFullyConnectedReshapingLayer>(
-      node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
+      node, _ctx, _tensor_builder, _tensor_reg, _current_layout);
   _return_fn = std::make_unique<exec::FunctionSequence>(
       std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
 }
@@ -552,7 +572,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
   assert(backend_layout == ifm_tensor->layout());
   assert(backend_layout == indices_tensor->layout());
-  assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
+  assert(ifm_rank < 4 || _current_layout == backend_layout);
 
   // input is n-D, indices k-D, output is (n + k - 1)-D
   size_t n = ifm_rank;
@@ -686,7 +706,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
   for (const auto &input_index : input_indexes)
     inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = _tensor_reg->getAclTensor(output_index)->layout();
 
   if (axis < 0)
@@ -738,7 +758,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
   {
     const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
 
-    const auto frontend_layout = _current_op_seq_layout;
+    const auto frontend_layout = _current_layout;
     const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
     const auto axis =
         acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
@@ -762,8 +782,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
 void KernelGenerator::visit(const ir::operation::Pool2D &node)
 {
   auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
-      node, _ctx, _tensor_reg, _current_op_seq_layout,
-      acl_common::convertPoolType(node.param().op_type));
+      node, _ctx, _tensor_reg, _current_layout, acl_common::convertPoolType(node.param().op_type));
 
   const auto ofm_index{node.getOutputs().at(0)};
   auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index);
@@ -836,7 +855,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
   // Convert to ACL axes taking into account negative values and possible duplicates.
   const auto &axes = _ctx.at(axes_index);
   const auto input_rank = _ctx.at(input_index).shape().rank();
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = input_tensor->layout();
   const auto reduce_axes =
       acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
@@ -873,7 +892,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
 
   // NOTE This operation must not be changed the layout from frontend to backend
   //      So, PermutationOperationPass makes layouts of frontend and backend the same.
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = output_tensor->layout();
   assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
          frontend_layout == backend_layout);
@@ -1047,7 +1066,7 @@ void KernelGenerator::visit(const ir::operation::Split &node)
   for (const auto &ofm_ind : output_indexes)
     output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind)->handle());
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = ifm_tensor->layout();
   auto axis = _ctx.at(axis_index).asScalar<int32_t>();
   if (axis < 0)
@@ -1085,7 +1104,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
 
   auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
   auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = inputData_tensor->layout();
 
   // Set initializers for indices data such as order of inputData
@@ -1150,7 +1169,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
 
   auto outputData_tensor = _tensor_reg->getAclTensor(output_index);
   auto inputData_tensor = _tensor_reg->getAclTensor(input_index);
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = inputData_tensor->layout();
 
   // Set initializers for indices data such as order of inputData
@@ -1244,9 +1263,9 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
   const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
   const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
 
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_layout);
 
   const auto stride = node.param().stride;
 
@@ -1285,7 +1304,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
 
   auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx);
   const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx);
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = ifm_tensor->layout();
   const auto rank = _ctx.at(ifm_idx).shape().rank();
 
@@ -1340,7 +1359,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
   for (const auto &output_index : output_indexes)
     outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = _tensor_reg->getAclTensor(input_index)->layout();
   if (axis < 0)
     axis += input_rank;
@@ -1413,7 +1432,7 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
   auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx);
 
   const size_t output_rank = _ctx.at(out_idx).shape().rank();
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto backend_layout = output_tensor->layout();
   int32_t axis = node.param().axis == -1 ? output_rank - 1 : node.param().axis;
   axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.h b/runtime/onert/backend/acl_neon/KernelGenerator.h
index 4d269cde5..2a4b307b8 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.h
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.h
@@ -17,7 +17,7 @@
 #ifndef __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
 #define __ONERT_BACKEND_ACL_NEON_KERNEL_GENERATOR_H__
 
-#include <backend/IKernelGenerator.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
 
 #include "ir/Operands.h"
 #include "TensorBuilder.h"
@@ -31,7 +31,7 @@ namespace backend
 namespace acl_neon
 {
 
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public cpu_common::KernelGeneratorBase
 {
 public:
   KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
@@ -39,17 +39,20 @@ public:
                   const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
 
   void visit(const ir::OpSequence &) override;
-  void visit(const ir::operation::ArgMax &) override;
+
+  void visit(const ir::operation::ArgMinMax &) override;
   void visit(const ir::operation::BatchToSpaceND &) override;
   void visit(const ir::operation::BinaryArithmetic &) override;
+  void visit(const ir::operation::Comparison &) override;
+  void visit(const ir::operation::Concat &) override;
   void visit(const ir::operation::Conv2D &) override;
   void visit(const ir::operation::DepthToSpace &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::Concat &) override;
   void visit(const ir::operation::ElementwiseActivation &) override;
   void visit(const ir::operation::ElementwiseBinary &) override;
   void visit(const ir::operation::ElementwiseUnary &) override;
   void visit(const ir::operation::EmbeddingLookup &) override;
+  void visit(const ir::operation::ExpandDims &) override;
   void visit(const ir::operation::FullyConnected &) override;
   void visit(const ir::operation::Gather &) override;
   void visit(const ir::operation::HashtableLookup &) override;
@@ -57,36 +60,34 @@ public:
   void visit(const ir::operation::L2Normalization &) override;
   void visit(const ir::operation::LocalResponseNormalization &) override;
   void visit(const ir::operation::LSTM &) override;
+  void visit(const ir::operation::OneHot &) override;
   void visit(const ir::operation::Pack &) override;
   void visit(const ir::operation::Pad &) override;
-  void visit(const ir::operation::Pool2D &) override;
   void visit(const ir::operation::Permute &) override;
+  void visit(const ir::operation::Pool2D &) override;
   void visit(const ir::operation::PReLU &) override;
   void visit(const ir::operation::Reduce &) override;
   void visit(const ir::operation::Reshape &) override;
   void visit(const ir::operation::ResizeBilinear &) override;
   void visit(const ir::operation::RNN &) override;
-  void visit(const ir::operation::Squeeze &) override;
+  void visit(const ir::operation::Slice &) override;
   void visit(const ir::operation::Softmax &) override;
   void visit(const ir::operation::SpaceToBatchND &) override;
   void visit(const ir::operation::SpaceToDepth &) override;
   void visit(const ir::operation::Split &) override;
   void visit(const ir::operation::SquaredDifference &) override;
-  void visit(const ir::operation::Slice &) override;
+  void visit(const ir::operation::Squeeze &) override;
   void visit(const ir::operation::StridedSlice &) override;
-  void visit(const ir::operation::TransposeConv &) override;
   void visit(const ir::operation::Transpose &) override;
+  void visit(const ir::operation::TransposeConv &) override;
   void visit(const ir::operation::Unpack &) override;
-  void visit(const ir::operation::ExpandDims &) override;
-  void visit(const ir::operation::Comparison &) override;
-  void visit(const ir::operation::OneHot &) override;
 
 private:
   const ir::Operands &_ctx;
   const ir::Operations &_operations_ctx;
   std::shared_ptr<TensorBuilder> _tensor_builder;
   std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
-  ir::Layout _current_op_seq_layout;
+  ir::Layout _current_layout;
 };
 
 } // namespace acl_neon
diff --git a/runtime/onert/backend/acl_neon/Optimizer.h b/runtime/onert/backend/acl_neon/Optimizer.h
index 5fe0d519c..b8fb343e9 100644
--- a/runtime/onert/backend/acl_neon/Optimizer.h
+++ b/runtime/onert/backend/acl_neon/Optimizer.h
@@ -17,8 +17,7 @@
 #ifndef __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__
 #define __ONERT_BACKEND_ACL_NEON_OPTIMIZER_H__
 
-#include <backend/IOptimizer.h>
-#include <backend/BackendContext.h>
+#include "BackendContext.h"
 #include "TensorBuilder.h"
 
 namespace onert
@@ -28,12 +27,12 @@ namespace backend
 namespace acl_neon
 {
 
-class Optimizer : public IOptimizer
+class Optimizer
 {
 public:
   Optimizer(BackendContext *context);
 
-  void optimize() override;
+  void optimize();
 
 private:
   BackendContext *_context;
diff --git a/runtime/onert/backend/acl_neon/acl_neon.cc b/runtime/onert/backend/acl_neon/acl_neon.cc
index f490d132d..6535fb291 100644
--- a/runtime/onert/backend/acl_neon/acl_neon.cc
+++ b/runtime/onert/backend/acl_neon/acl_neon.cc
@@ -14,20 +14,11 @@
  * limitations under the License.
  */
 
-#include <util/logging.h>
-
 #include "Backend.h"
 
 extern "C" {
-onert::backend::Backend *onert_backend_create()
-{
-  VERBOSE(onert_backend_create) << "'acl_neon' loaded\n";
-  return new onert::backend::acl_neon::Backend;
-}
 
-void onert_backend_destroy(onert::backend::Backend *backend)
-{
-  VERBOSE(onert_backend_create) << "'acl_neon' unloaded\n";
-  delete backend;
-}
+onert::backend::Backend *onert_backend_create() { return new onert::backend::acl_neon::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
 }
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
index fc8574b26..0b416a7e9 100644
--- a/runtime/onert/backend/cpu/Backend.h
+++ b/runtime/onert/backend/cpu/Backend.h
@@ -54,8 +54,6 @@ public:
     context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
     context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
                                                             context->external_context());
-    context->tensor_register = nullptr;
-    context->optimizer = nullptr;
     return context;
   }
 
diff --git a/runtime/onert/backend/cpu/BackendContext.cc b/runtime/onert/backend/cpu/BackendContext.cc
new file mode 100644
index 000000000..6b958c1b7
--- /dev/null
+++ b/runtime/onert/backend/cpu/BackendContext.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/cpu_common/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+void BackendContext::initConsts()
+{
+  for (auto &op : operation_list())
+  {
+    constant_initializer->setLayout(op.layout);
+    graph()->operations().at(op.index).accept(*constant_initializer);
+  }
+
+  for (auto ind : operand_list())
+  {
+    const auto &obj = graph()->operands().at(ind);
+    if (obj.isConstant() && !constant_initializer->exist(ind))
+    {
+      constant_initializer->registerDefaultInitializer(ind, obj);
+    }
+  }
+
+  constant_initializer->run();
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                            const ir::OpSequences &op_seqs,
+                                            const ir::LowerInfoMap &lower_info)
+{
+  auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+                  ir::Remove::DUPLICATED;
+  for (auto index : operand_list())
+  {
+    if (model_io.contains(index))
+      continue;
+    const auto &obj = graph()->operands().at(index);
+    const auto frontend_layout = [&]() {
+      if (obj.getUses().size() == 0)
+        return ir::Layout::UNKNOWN;
+      auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
+      for (auto &operation_info : operation_list())
+      {
+        if (operation_info.index == use_op_ind)
+          return operation_info.layout;
+      }
+      return ir::Layout::UNKNOWN;
+    }();
+    const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
+    if (permute_factor.backend() != backend())
+      continue;
+    const auto backend_layout = permute_factor.layout();
+    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+    tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+  }
+
+  // TODO Get compiler options from compiler, and use it rather than getting it from Env
+  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+  {
+    cpu_common::planTensors(*this, order, op_seqs, lower_info);
+  }
+  else
+  {
+    // For the executors that does not have fixed linear execution order:
+    // To make tensors never be deallocated, this is a workaround to use static memory planner
+    for (auto ind : operand_list())
+    {
+      if (tensor_builder->isRegistered(ind))
+        tensor_builder->notifyFirstUse(ind);
+    }
+  }
+
+  tensor_builder->prepare();
+
+  return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                       const ir::OpSequences &op_seqs)
+{
+  FunctionMap ret;
+
+  for (auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    bool assigned = [&]() {
+      for (auto op_info : operation_list())
+        if (op_seq.exist(op_info.index))
+          return true;
+      return false;
+    }();
+    if (!assigned)
+      continue;
+    auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+    ret.emplace_back(op_seq_ind, std::move(fn_seq));
+  }
+
+  initConsts();
+
+  // NOTE For memory optimization, we want to free some operand data
+  for (auto ind : operand_list())
+  {
+    // TODO Remove const_cast
+    auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+    obj.releaseData();
+  }
+
+  for (auto &it : ret)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+  }
+
+  return ret;
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h
index e90b21054..0a4106d33 100644
--- a/runtime/onert/backend/cpu/BackendContext.h
+++ b/runtime/onert/backend/cpu/BackendContext.h
@@ -18,6 +18,9 @@
 #define __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
 
 #include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
 #include "ExternalContext.h"
 
 namespace onert
@@ -32,21 +35,35 @@ class BackendContext : public onert::backend::BackendContext
 public:
   BackendContext(const Backend *backend, const ir::Graph *graph,
                  std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
-                 std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
-                 std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
-                 std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
-                 std::shared_ptr<ITensorRegister> tensor_register = nullptr,
-                 std::shared_ptr<IOptimizer> optimizer = nullptr)
-      : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder,
-                                       constant_initializer, kernel_gen, tensor_register,
-                                       optimizer),
-        _external_context(new ExternalContext)
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+      : onert::backend::BackendContext(backend, graph, tensor_registry),
+        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+        kernel_gen{kernel_gen}, _external_context(new ExternalContext)
   {
   }
 
+  ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                              const ir::OpSequences &op_seqs,
+                              const ir::LowerInfoMap &lower_info) override;
+  FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+                         const ir::OpSequences &op_seqs) override;
+
   std::shared_ptr<ExternalContext> external_context() { return _external_context; }
 
 private:
+  void initConsts();
+  void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                   const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+  // TODO Make it private
+  std::shared_ptr<TensorBuilder> tensor_builder;
+  std::shared_ptr<ConstantInitializer> constant_initializer;
+  std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
   // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
   //      the thread pool is also created in duplicate
   // TODO Create one ruy context for session
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h
index c016c83bc..d7858c0f6 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.h
+++ b/runtime/onert/backend/cpu/ConstantInitializer.h
@@ -14,13 +14,10 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
-#define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
 
-#include "backend/cpu_common/TensorRegistry.h"
-
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
+#include <backend/cpu_common/ConstantInitializer.h>
 
 namespace onert
 {
@@ -29,35 +26,10 @@ namespace backend
 namespace cpu
 {
 
-class ConstantInitializer : public IConstantInitializer
-{
-public:
-  ConstantInitializer(const ir::Operands &operands,
-                      const std::shared_ptr<ITensorRegistry> &tensor_reg);
-
-public:
-  void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
-
-  // TODO: For now the only cpu backend supports constant tensor to use data from external
-  // If the other backend supports (to do this,
-  // ExternalTensor should be abstract such as IExternal, maybe),
-  // this can be an interface of IConstantInitializer
-  void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &);
-
-public:
-  void visit(const ir::operation::Conv2D &) override;
-  void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::FullyConnected &) override;
-
-private:
-  std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
-
-private:
-  std::shared_ptr<ITensorRegistry> _tensor_reg;
-};
+using ConstantInitializer = cpu_common::ConstantInitializer;
 
 } // namespace cpu
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_CPU_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h
index 32e249f5a..f5d11f4f1 100644
--- a/runtime/onert/backend/cpu/ExternalContext.h
+++ b/runtime/onert/backend/cpu/ExternalContext.h
@@ -17,7 +17,6 @@
 #ifndef __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
 #define __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
 
-#include <backend/IExternalContext.h>
 #include <util/ConfigSource.h>
 #include <ruy/context.h>
 
@@ -33,7 +32,7 @@ namespace backend
 namespace cpu
 {
 
-class ExternalContext : public IExternalContext
+class ExternalContext
 {
 public:
   ExternalContext() : _ruy_context(new ruy::Context)
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 451815b65..25756eced 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -23,6 +23,7 @@
 #include "ops/CompareLayer.h"
 #include "ops/ConcatLayer.h"
 #include "ops/ConvolutionLayer.h"
+#include "ops/DepthToSpaceLayer.h"
 #include "ops/DepthwiseConvolutionLayer.h"
 #include "ops/EinsumLayer.h"
 #include "ops/ElementwiseActivationLayer.h"
@@ -108,12 +109,16 @@ convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type
 {
   switch (type_ir)
   {
+    case ir::operation::ElementwiseActivation::Type::ELU:
+      return ops::ElementwiseActivationType::kElu;
     case ir::operation::ElementwiseActivation::Type::LOGISTIC:
       return ops::ElementwiseActivationType::kLogistic;
     case ir::operation::ElementwiseActivation::Type::RELU:
       return ops::ElementwiseActivationType::kReLU;
     case ir::operation::ElementwiseActivation::Type::TANH:
       return ops::ElementwiseActivationType::kTanh;
+    case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
+      return ops::ElementwiseActivationType::kLeakyReLU;
     default:
       throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
   }
@@ -124,6 +129,8 @@ convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinary
 {
   switch (type_ir)
   {
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+      return ops::ElementwiseBinaryType::kLogicalAnd;
     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
       return ops::ElementwiseBinaryType::kLogicalOr;
     case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
@@ -167,6 +174,10 @@ ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::Elementwise
       return ops::ElementwiseUnaryType::kRSqrt;
     case ir::operation::ElementwiseUnary::Type::SIN:
       return ops::ElementwiseUnaryType::kSin;
+    case ir::operation::ElementwiseUnary::Type::SQRT:
+      return ops::ElementwiseUnaryType::kSqrt;
+    case ir::operation::ElementwiseUnary::Type::SQUARE:
+      return ops::ElementwiseUnaryType::kSquare;
     case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
       return ops::ElementwiseUnaryType::kZerosLike;
     default:
@@ -217,7 +228,7 @@ KernelGenerator::KernelGenerator(
     const std::shared_ptr<ExternalContext> &external_context)
     : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
       _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
-      _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+      _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
 {
   // DO NOTHING
 }
@@ -260,7 +271,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
     _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
   }
 
-  _current_op_seq_layout = op_seq.getLayout();
+  _current_layout = op_seq.getLayout();
   for (const auto &operation_idx : op_seq.operations())
   {
     const auto &node = _operations_ctx.at(operation_idx);
@@ -314,8 +325,8 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
     _return_fn = std::move(fn);
     return;
   }
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
   // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
@@ -342,8 +353,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
 
   const auto stride = node.param().stride;
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
   // Kernel format is [1, kernel_height, kernel_width, depth_out].
   const auto &ker_shape = _ctx.at(ker_index).shape();
   const auto ker_height = ker_shape.dim(1);
@@ -364,7 +375,7 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
 
   fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
                 padding.bottom, stride.horizontal, stride.vertical, multiplier, dilation_width,
-                dilation_height, activation, ofm_tensor);
+                dilation_height, activation, ofm_tensor, _external_context);
 
   _return_fn = std::move(fn);
 }
@@ -374,7 +385,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
   const auto ofm_index{node.getOutputs().at(0)};
 
   const auto rank = _ctx.at(ofm_index).shape().rank();
-  const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+  const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
 
   auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
 
@@ -418,16 +429,15 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
 void KernelGenerator::visit(const ir::operation::Fill &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
+  // SHAPE input is used for shape inference
   const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
 
   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
-  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
   auto value_tensor = _tensor_reg->getPortableTensor(value_index);
 
   auto fn = std::make_unique<ops::FillLayer>();
 
-  fn->configure(input_tensor, value_tensor, output_tensor);
+  fn->configure(value_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -576,7 +586,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
   assert(backend_layout == indices_tensor->layout());
   const auto &input_shape = _ctx.at(input_index).shape();
   UNUSED_RELEASE(input_shape);
-  assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout);
+  assert(input_shape.rank() < 4 || _current_layout == backend_layout);
 
   const auto axis_raw = node.param().axis;
   const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
@@ -640,7 +650,7 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
     for (auto &idx : opSeq)
     {
       const auto &operand = _ctx.at(idx);
-      // TODO make sure using `_current_op_seq_layout` is correct for custom operations
+      // TODO make sure using `_current_layout` is correct for custom operations
       types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
       auto in_tensor = _tensor_reg->getPortableTensor(idx);
       tensors.emplace_back(in_tensor);
@@ -713,15 +723,14 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
 {
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
-  const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
+  // AXIS input is used for output shape inference
 
   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
-  auto axis_tensor = _tensor_reg->getPortableTensor(axis_index);
 
   auto fn = std::make_unique<ops::ExpandDimsLayer>();
 
-  fn->configure(input_tensor, axis_tensor, output_tensor);
+  fn->configure(input_tensor, output_tensor);
 
   _return_fn = std::move(fn);
 }
@@ -731,7 +740,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
   const auto ofm_index{node.getOutputs().at(0)};
 
   const auto rank = _ctx.at(ofm_index).shape().rank();
-  const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+  const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
 
   assert(-rank <= axis && axis < rank);
 
@@ -753,7 +762,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
   const auto input_index{node.getInputs().at(0)};
 
   const auto rank = _ctx.at(input_index).shape().rank();
-  const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
+  const auto axis = ops::getAxis(rank, node.param().axis, _current_layout);
 
   assert(rank == 0 || (-rank <= axis && axis < rank));
 
@@ -1004,11 +1013,11 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
   _return_fn = std::move(fn);
 }
 
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
+void KernelGenerator::visit(const ir::operation::ArgMinMax &node)
 {
   const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ArgMax::INPUT)};
-  const auto axis_index{node.getInputs().at(ir::operation::ArgMax::AXIS)};
+  const auto input_index{node.getInputs().at(ir::operation::ArgMinMax::INPUT)};
+  const auto axis_index{node.getInputs().at(ir::operation::ArgMinMax::AXIS)};
 
   auto output_tensor = _tensor_reg->getPortableTensor(output_index);
   auto input_tensor = _tensor_reg->getPortableTensor(input_index);
@@ -1016,7 +1025,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
 
   auto fn = std::make_unique<ops::ArgMinMaxLayer>();
 
-  fn->configure(input_tensor, output_tensor, axis_tensor, /* is_arg_max */ true);
+  fn->configure(input_tensor, output_tensor, axis_tensor, node.param().is_arg_max);
 
   _return_fn = std::move(fn);
 }
@@ -1029,8 +1038,8 @@ void KernelGenerator::visit(const ir::operation::Pool2D &node)
   const auto kh = node.param().kh;
   const auto kw = node.param().kw;
   const auto stride = node.param().stride;
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
   const auto padding =
       ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
   const auto activation = node.param().activation;
@@ -1255,6 +1264,21 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
   _return_fn = std::move(fn);
 }
 
+void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
+{
+  const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
+  const auto output_index{node.getOutputs().at(0)};
+  auto block_size = node.param().block_size;
+
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+
+  auto fn = std::make_unique<ops::DepthToSpaceLayer>();
+
+  fn->configure(input_tensor, block_size, output_tensor);
+  _return_fn = std::move(fn);
+}
+
 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
 {
   const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h
index 5df77607f..3a4cfbffa 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -23,7 +23,7 @@
 #include "Tensor.h"
 
 #include <backend/CustomKernelBuilder.h>
-#include <backend/IKernelGenerator.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
 #include <ir/Operands.h>
 #include <ir/Operations.h>
 
@@ -34,7 +34,7 @@ namespace backend
 namespace cpu
 {
 
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public cpu_common::KernelGeneratorBase
 {
 public:
   KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
@@ -43,59 +43,59 @@ public:
                   const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
                   const std::shared_ptr<ExternalContext> &external_context);
 
-  using IKernelGenerator::visit;
+  void visit(const ir::OpSequence &) override;
 
   void visit(const ir::operation::AddN &) override;
-  void visit(const ir::OpSequence &) override;
+  void visit(const ir::operation::ArgMinMax &) override;
+  void visit(const ir::operation::BatchMatMul &) override;
+  void visit(const ir::operation::BatchToSpaceND &) override;
+  void visit(const ir::operation::BinaryArithmetic &) override;
+  void visit(const ir::operation::BroadcastTo &) override;
+  void visit(const ir::operation::Comparison &) override;
+  void visit(const ir::operation::Concat &) override;
   void visit(const ir::operation::Conv2D &) override;
+  void visit(const ir::operation::Custom &node) override;
+  void visit(const ir::operation::DepthToSpace &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::Concat &) override;
-  void visit(const ir::operation::Fill &) override;
-  void visit(const ir::operation::FullyConnected &) override;
-  void visit(const ir::operation::Reshape &) override;
-  void visit(const ir::operation::Squeeze &) override;
-  void visit(const ir::operation::Softmax &) override;
-  void visit(const ir::operation::Comparison &) override;
-  void visit(const ir::operation::BinaryArithmetic &) override;
   void visit(const ir::operation::Einsum &) override;
-  void visit(const ir::operation::Gather &) override;
-  void visit(const ir::operation::Custom &node) override;
   void visit(const ir::operation::ElementwiseActivation &) override;
   void visit(const ir::operation::ElementwiseBinary &) override;
   void visit(const ir::operation::ElementwiseUnary &) override;
   void visit(const ir::operation::ExpandDims &) override;
+  void visit(const ir::operation::Fill &) override;
+  void visit(const ir::operation::FullyConnected &) override;
+  void visit(const ir::operation::FusedBatchNorm &) override;
+  void visit(const ir::operation::Gather &) override;
+  void visit(const ir::operation::L2Normalization &) override;
+  void visit(const ir::operation::LogSoftmax &) override;
   void visit(const ir::operation::LSTM &) override;
-  void visit(const ir::operation::Pad &) override;
-  void visit(const ir::operation::Pack &) override;
-  void visit(const ir::operation::Unpack &) override;
+  void visit(const ir::operation::MatrixBandPart &) override;
   void visit(const ir::operation::OneHot &) override;
-  void visit(const ir::operation::Transpose &) override;
-  void visit(const ir::operation::Reduce &) override;
-  void visit(const ir::operation::Select &) override;
-  void visit(const ir::operation::Slice &) override;
-  void visit(const ir::operation::StridedSlice &) override;
-  void visit(const ir::operation::Split &) override;
-  void visit(const ir::operation::Shape &) override;
-  void visit(const ir::operation::ResizeBilinear &node) override;
-  void visit(const ir::operation::Reverse &) override;
-  void visit(const ir::operation::ArgMax &) override;
+  void visit(const ir::operation::Pack &) override;
+  void visit(const ir::operation::Pad &) override;
   void visit(const ir::operation::Pool2D &) override;
   void visit(const ir::operation::Pow &) override;
-  void visit(const ir::operation::SquaredDifference &) override;
-  void visit(const ir::operation::Tile &) override;
-  void visit(const ir::operation::L2Normalization &) override;
   void visit(const ir::operation::Range &) override;
   void visit(const ir::operation::Rank &) override;
-  void visit(const ir::operation::MatrixBandPart &) override;
-  void visit(const ir::operation::BatchMatMul &) override;
-  void visit(const ir::operation::BatchToSpaceND &) override;
-  void visit(const ir::operation::BroadcastTo &) override;
-  void visit(const ir::operation::FusedBatchNorm &) override;
-  void visit(const ir::operation::LogSoftmax &) override;
+  void visit(const ir::operation::Reduce &) override;
+  void visit(const ir::operation::Reshape &) override;
+  void visit(const ir::operation::ResizeBilinear &node) override;
+  void visit(const ir::operation::Reverse &) override;
+  void visit(const ir::operation::Select &) override;
+  void visit(const ir::operation::Shape &) override;
+  void visit(const ir::operation::Slice &) override;
+  void visit(const ir::operation::Softmax &) override;
   void visit(const ir::operation::SpaceToBatchND &) override;
   void visit(const ir::operation::SpaceToDepth &) override;
-  void visit(const ir::operation::StatelessRandomUniform &) override;
+  void visit(const ir::operation::Split &) override;
   void visit(const ir::operation::SplitV &) override;
+  void visit(const ir::operation::SquaredDifference &) override;
+  void visit(const ir::operation::Squeeze &) override;
+  void visit(const ir::operation::StatelessRandomUniform &) override;
+  void visit(const ir::operation::StridedSlice &) override;
+  void visit(const ir::operation::Tile &) override;
+  void visit(const ir::operation::Transpose &) override;
+  void visit(const ir::operation::Unpack &) override;
 
 private:
   const ir::Operands &_ctx;
@@ -103,7 +103,7 @@ private:
   std::shared_ptr<TensorBuilder> _tensor_builder;
   std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
   std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
-  ir::Layout _current_op_seq_layout;
+  ir::Layout _current_layout;
   const std::shared_ptr<ExternalContext> _external_context;
 };
 
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.cc b/runtime/onert/backend/cpu/StaticTensorManager.cc
deleted file mode 100644
index 3edac897c..000000000
--- a/runtime/onert/backend/cpu/StaticTensorManager.cc
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "StaticTensorManager.h"
-#include "Tensor.h"
-
-#include <util/logging.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
-                                         cpu_common::DynamicTensorManager *dynamic_tensor_manager)
-    : _nonconst_mgr{new cpu_common::MemoryManager()}, _tensors{reg},
-      _dynamic_tensor_manager{dynamic_tensor_manager}
-{
-  // DO NOTHING
-}
-
-void StaticTensorManager::allocateNonconsts(void)
-{
-  _nonconst_mgr->allocate();
-
-  for (auto &pair : _tensors->native_tensors())
-  {
-    const auto &ind = pair.first;
-    auto tensor = pair.second.get();
-    if (!_as_constants[ind] && !tensor->is_dynamic())
-    {
-      auto *buffer = _nonconst_mgr->getBuffer(ind);
-      tensor->setBuffer(buffer);
-
-      VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value()
-                                       << "): " << static_cast<void *>(buffer) << std::endl;
-    }
-  }
-}
-
-void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
-
-void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
-                                      const ir::OperandInfo &tensor_info, ir::Layout backend_layout,
-                                      bool as_const)
-{
-  assert(!_tensors->getITensor(ind));
-  if (as_const)
-  {
-    auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout);
-    _tensors->setNativeTensor(ind, std::move(tensor));
-  }
-  else
-  {
-    auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout,
-                                           _dynamic_tensor_manager->dynamic_mem_mgr().get());
-    _tensors->setNativeTensor(ind, std::move(tensor));
-  }
-  _as_constants[ind] = as_const;
-}
-
-void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
-{
-  assert(_tensors->getITensor(ind));
-
-  // This method is called only when a tensor has proper shape
-  assert(!_tensors->getITensor(ind)->is_dynamic());
-
-  if (!_as_constants[ind])
-    _nonconst_mgr->claimPlan(ind, size);
-}
-
-void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
-{
-  assert(_tensors->getITensor(ind));
-
-  // This method is called only when a tensor has proper shape
-  assert(!_tensors->getITensor(ind)->is_dynamic());
-
-  if (!_as_constants[ind])
-    _nonconst_mgr->releasePlan(ind);
-}
-
-void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
-{
-  for (const auto &it : _tensors->native_tensors())
-    fn(it.first);
-}
-
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.h b/runtime/onert/backend/cpu/StaticTensorManager.h
index 2af61e4e7..d07f0c814 100644
--- a/runtime/onert/backend/cpu/StaticTensorManager.h
+++ b/runtime/onert/backend/cpu/StaticTensorManager.h
@@ -17,13 +17,7 @@
 #ifndef __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
 #define __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
 
-#include "backend/IStaticTensorManager.h"
-#include "backend/cpu_common/DynamicTensorManager.h"
-#include "backend/cpu_common/MemoryManager.h"
-#include "backend/cpu_common/TensorRegistry.h"
-#include "backend/ITensorManager.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/OperandInfo.h"
+#include "backend/cpu_common/StaticTensorManager.h"
 
 namespace onert
 {
@@ -32,30 +26,7 @@ namespace backend
 namespace cpu
 {
 
-class StaticTensorManager : public backend::IStaticTensorManager
-{
-public:
-  StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
-                      cpu_common::DynamicTensorManager *dynamic_tensor_manager);
-  virtual ~StaticTensorManager() = default;
-
-  void allocateNonconsts(void);
-  void deallocateNonconsts(void);
-
-  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
-                   ir::Layout backend_layout, bool as_const);
-
-  void claimPlan(const ir::OperandIndex &ind, uint32_t size);
-  void releasePlan(const ir::OperandIndex &ind);
-
-  void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
-
-private:
-  std::unique_ptr<cpu_common::MemoryManager> _nonconst_mgr;
-  const std::shared_ptr<cpu_common::TensorRegistry> _tensors;
-  ir::OperandIndexMap<bool> _as_constants;
-  cpu_common::DynamicTensorManager *_dynamic_tensor_manager;
-};
+using StaticTensorManager = cpu_common::StaticTensorManager;
 
 } // namespace cpu
 } // namespace backend
diff --git a/runtime/onert/backend/cpu/Tensor.h b/runtime/onert/backend/cpu/Tensor.h
index 2ad2ad0fb..d663c3f50 100644
--- a/runtime/onert/backend/cpu/Tensor.h
+++ b/runtime/onert/backend/cpu/Tensor.h
@@ -28,92 +28,7 @@ namespace cpu
 {
 
 using Tensor = cpu_common::Tensor;
-
-/**
- * @brief Class that uses data from external memory that is not managed by a backend
- *        instead of allocating and copying the data. ExternalTensor's data pointer points to
- *        an address of memory such as where memory is already allocated, or mmapped area.
- *        This is meaning that ExternalTensor can take all of types' ir::Data.
- *        To support this, assume below things no padding, always NHWC layout,
- *        constant tensor and not dynamic.
- */
-class ExternalTensor : public Tensor
-{
-public:
-  ExternalTensor() = delete;
-  virtual ~ExternalTensor();
-
-public:
-  ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
-      : Tensor(info, layout, nullptr)
-  {
-    assert(_layout == ir::Layout::NHWC);
-    assert(_info.isConstant());
-    assert(_info.isDynamic() == false);
-  }
-
-public:
-  /**
-   * @brief     set Data to be shared from external so that this ExternalTensor will not be
-   *            allocated on CPU backend
-   * @param[in] data    data of Operand to be set
-   */
-  void setData(const std::shared_ptr<ir::Data> data)
-  {
-    assert(data != nullptr);
-    _data = data;
-    // Note. Some op such as cker::Conv could take buffer as nullptr.
-    // That's why _buffer also would be used
-    _buffer = const_cast<uint8_t *>(_data->base());
-  }
-
-public:
-  uint8_t *buffer() const override { return _buffer; }
-
-  bool is_constant() const override { return true; }
-  bool is_dynamic() const override { return false; }
-  void set_dynamic() override
-  {
-    throw std::runtime_error("This tensor does not support changing dynamic");
-  }
-
-  void setShape(const ir::Shape &) override
-  {
-    throw std::runtime_error("This tensor does not support changing shape");
-  }
-
-  void increase_ref() override { ++_num_references; }
-
-  void decrease_ref() override
-  {
-    assert(_data != nullptr);
-    assert(_num_references > 0);
-    --_num_references;
-    if (_num_references == 0)
-    {
-      _data.reset();
-      _buffer = nullptr;
-    }
-  }
-
-  /**
-   * @brief Reset reference count to zero and release data
-   */
-  void reset_ref() override
-  {
-    assert(_data != nullptr);
-    assert(_num_references > 0);
-    _num_references = 0;
-
-    _data.reset();
-    _buffer = nullptr;
-  }
-
-  int32_t num_references() override { return _num_references; }
-
-private:
-  std::shared_ptr<const ir::Data> _data;
-};
+using ExternalTensor = cpu_common::ExternalTensor;
 
 } // namespace cpu
 } // namespace backend
diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h
index 448abc229..9d8a5deb5 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.h
+++ b/runtime/onert/backend/cpu/TensorBuilder.h
@@ -20,7 +20,6 @@
 #include <backend/cpu_common/DynamicTensorManager.h>
 #include <backend/cpu_common/TensorRegistry.h>
 
-#include <backend/ITensorBuilder.h>
 #include <ir/OperandIndexMap.h>
 
 #include "StaticTensorManager.h"
@@ -35,7 +34,7 @@ namespace backend
 namespace cpu
 {
 
-class TensorBuilder : public ITensorBuilder
+class TensorBuilder
 {
 public:
   TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
@@ -47,18 +46,18 @@ public:
    * @param[in] layout Operand data layout
    */
   void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                          ir::Layout backend_layout) override;
+                          ir::Layout backend_layout);
 
-  void notifyFirstUse(const ir::OperandIndex &) override;
-  void notifyLastUse(const ir::OperandIndex &) override;
+  void notifyFirstUse(const ir::OperandIndex &);
+  void notifyLastUse(const ir::OperandIndex &);
 
-  bool isRegistered(const ir::OperandIndex &) const override;
+  bool isRegistered(const ir::OperandIndex &) const;
 
-  void prepare(void) override;
-  void allocate() override;
-  void postFunctionPrepare() override { /* DO NOTHING */}
+  void prepare(void);
+  void allocate();
+  void postFunctionPrepare() { /* DO NOTHING */}
 
-  IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
+  IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
 
 private:
   const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
diff --git a/runtime/onert/backend/cpu/cpu.cc b/runtime/onert/backend/cpu/cpu.cc
index 5385bb2a3..55538e2a6 100644
--- a/runtime/onert/backend/cpu/cpu.cc
+++ b/runtime/onert/backend/cpu/cpu.cc
@@ -16,18 +16,9 @@
 
 #include "Backend.h"
 
-#include <util/logging.h>
-
 extern "C" {
-onert::backend::Backend *onert_backend_create()
-{
-  VERBOSE(onert_backend_create) << "'cpu' loaded\n";
-  return new onert::backend::cpu::Backend;
-}
 
-void onert_backend_destroy(onert::backend::Backend *backend)
-{
-  VERBOSE(onert_backend_create) << "'cpu' unloaded\n";
-  delete backend;
-}
+onert::backend::Backend *onert_backend_create() { return new onert::backend::cpu::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
 }
diff --git a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
index 2fd284c91..d5ffdef0b 100644
--- a/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ArgMinMaxLayer.cc
@@ -79,6 +79,9 @@ void ArgMinMaxLayer::run()
       case ir::DataType::UINT8:
         TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
         break;
+      case ir::DataType::QUANT_INT8_ASYMM:
+        TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
+        break;
       case ir::DataType::INT32:
         TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int32_t);
         break;
@@ -97,6 +100,9 @@ void ArgMinMaxLayer::run()
       case ir::DataType::UINT8:
         TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t);
         break;
+      case ir::DataType::QUANT_INT8_ASYMM:
+        TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t);
+        break;
       case ir::DataType::INT32:
         TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int64_t);
         break;
diff --git a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
index 7ef023788..ba9655924 100644
--- a/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
+++ b/runtime/onert/backend/cpu/ops/BatchMatMulLayer.cc
@@ -67,7 +67,7 @@ void BatchMatMulLayer::configure(const IPortableTensor *lhs, const IPortableTens
 
 void BatchMatMulLayer::run()
 {
-  if (_lhs->data_type() == OperandType::FLOAT32)
+  if ((_lhs->data_type() == OperandType::FLOAT32) && (_rhs->data_type() == OperandType::FLOAT32))
   {
     batchMatMulFloat32();
   }
diff --git a/runtime/onert/backend/cpu/ops/ConcatLayer.cc b/runtime/onert/backend/cpu/ops/ConcatLayer.cc
index d26ed7378..edfdfc1a6 100644
--- a/runtime/onert/backend/cpu/ops/ConcatLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConcatLayer.cc
@@ -117,24 +117,26 @@ void ConcatLayer::configure(const std::vector<const IPortableTensor *> &inputs,
 
 void ConcatLayer::run()
 {
-  if (_output->data_type() == OperandType::FLOAT32)
+  switch (_output->data_type())
   {
-    concatenationGeneral<float>();
+    case OperandType::FLOAT32:
+      concatenationGeneral<float>();
+      break;
+    case OperandType::QUANT_UINT8_ASYMM:
+      concatenationQuant8();
+      break;
+    case OperandType::QUANT_INT8_ASYMM:
+      concatenationGeneral<int8_t>();
+      break;
+    case OperandType::INT32:
+      concatenationGeneral<int32_t>();
+      break;
+    case OperandType::INT64:
+      concatenationGeneral<int64_t>();
+      break;
+    default:
+      throw std::runtime_error("Concat: unsupported data type");
   }
-  else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    concatenationQuant8();
-  }
-  else if (_output->data_type() == OperandType::INT32)
-  {
-    concatenationGeneral<int32_t>();
-  }
-  else if (_output->data_type() == OperandType::INT64)
-  {
-    concatenationGeneral<int64_t>();
-  }
-  else
-    throw std::runtime_error("Concat: unsupported data type");
 }
 
 } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
index 799e9e2d0..c964e38f9 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
@@ -203,8 +203,6 @@ void ConvolutionLayer::prepare()
   _prepare = true;
 }
 
-#undef ANDROID_NN_CONV_PARAMETERS
-
 } // namespace ops
 } // namespace cpu
 } // namespace backend
diff --git a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc
new file mode 100644
index 000000000..d265d0ac2
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.cc
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthToSpaceLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/DepthToSpace.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+DepthToSpaceLayer::DepthToSpaceLayer() : _input(nullptr), _block_size(0), _output(nullptr)
+{
+  // DO NOTHING
+}
+
+template <typename T> void DepthToSpaceLayer::depthToSpace()
+{
+  nnfw::cker::DepthToSpace(getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()),
+                           getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()),
+                           _block_size);
+}
+
+void DepthToSpaceLayer::configure(const IPortableTensor *input, const int32_t block_size,
+                                  IPortableTensor *output)
+{
+  _input = input;
+  _block_size = block_size;
+  _output = output;
+}
+
+void DepthToSpaceLayer::run()
+{
+  switch (_input->data_type())
+  {
+    case OperandType::FLOAT32:
+      depthToSpace<float>();
+      break;
+    case OperandType::INT32:
+      depthToSpace<int32_t>();
+      break;
+    case OperandType::INT64:
+      depthToSpace<int64_t>();
+      break;
+    case OperandType::QUANT_UINT8_ASYMM:
+      depthToSpace<uint8_t>();
+      break;
+    case OperandType::QUANT_INT8_ASYMM:
+      depthToSpace<int8_t>();
+      break;
+    default:
+      throw std::runtime_error{"DepthToSpace: unsupported data type"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h
new file mode 100644
index 000000000..32e0171ce
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/DepthToSpaceLayer.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+class DepthToSpaceLayer : public ::onert::exec::IFunction
+{
+public:
+  DepthToSpaceLayer();
+
+  void configure(const IPortableTensor *input, const int32_t block_size, IPortableTensor *output);
+
+  void run() override;
+
+private:
+  template <typename T> void depthToSpace();
+
+  const IPortableTensor *_input;
+  int32_t _block_size;
+  IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_DEPTH_TO_SPACE_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
index f1dc1103a..85553d14d 100644
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
@@ -43,11 +43,12 @@ void DepthwiseConvolutionLayer::convFloat32()
   op_params.float_activation_min = output_activation_min;
   op_params.float_activation_max = output_activation_max;
 
-  nnfw::cker::DepthwiseConv(
+  nnfw::cker::DepthwiseConv<float, float>(
       op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
       getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
       getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+      _external_context->ruy_context());
 }
 
 void DepthwiseConvolutionLayer::convQuant8()
@@ -79,11 +80,12 @@ void DepthwiseConvolutionLayer::convQuant8()
   op_params.quantized_activation_min = output_activation_min;
   op_params.quantized_activation_max = output_activation_max;
 
-  nnfw::cker::DepthwiseConv(
+  nnfw::cker::DepthwiseConv<uint8_t, int32_t>(
       op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
       getTensorShape(_kernel), reinterpret_cast<const uint8_t *>(_kernel->buffer()),
       getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias->buffer()),
-      getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+      getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()),
+      _external_context->ruy_context());
 }
 
 void DepthwiseConvolutionLayer::configure(
@@ -91,7 +93,8 @@ void DepthwiseConvolutionLayer::configure(
     const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
     const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight,
     const uint32_t multiplier, const uint32_t dilationWidth, const uint32_t dilationHeight,
-    const ir::Activation activation, IPortableTensor *output)
+    const ir::Activation activation, IPortableTensor *output,
+    const std::shared_ptr<ExternalContext> &external_context)
 {
   _input = input;
   _kernel = kernel;
@@ -107,6 +110,7 @@ void DepthwiseConvolutionLayer::configure(
   _dilationHeight = dilationHeight;
   _activation = activation;
   _output = output;
+  _external_context = external_context;
 }
 
 void DepthwiseConvolutionLayer::run()
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
index fb032ecbf..fe1fcc182 100644
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
@@ -19,6 +19,7 @@
 
 #include <backend/IPortableTensor.h>
 #include "OperationUtils.h"
+#include "../ExternalContext.h"
 
 #include <exec/IFunction.h>
 
@@ -47,7 +48,7 @@ public:
                  const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH,
                  const uint32_t multiplier, const uint32_t dilationWidth,
                  const uint32_t dilationHeight, const ir::Activation activation,
-                 IPortableTensor *output);
+                 IPortableTensor *output, const std::shared_ptr<ExternalContext> &external_context);
 
   void run() override;
 
@@ -71,6 +72,8 @@ private:
   uint32_t _dilationHeight{1};
 
   ir::Activation _activation{ir::Activation::NONE};
+
+  std::shared_ptr<ExternalContext> _external_context;
 };
 
 } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
index c1d63172b..3e1da5ec0 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
@@ -18,6 +18,8 @@
 
 #include "OperationUtils.h"
 
+#include <cker/operation/ELU.h>
+#include <cker/operation/LeakyReLU.h>
 #include <cker/operation/Logistic.h>
 #include <cker/operation/ReLU.h>
 #include <cker/operation/ReLU6.h>
@@ -91,6 +93,19 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
 
   switch (op_type)
   {
+    case ElementwiseActivationType::kElu:
+      if (input->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+          nnfw::cker::ELU(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                          getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+        };
+      }
+      else
+      {
+        throw std::runtime_error{"ElementwiseActivationLayer(Elu): unsupported data type"};
+      }
+      break;
     case ElementwiseActivationType::kLogistic:
       if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
       {
@@ -160,6 +175,21 @@ void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortab
         throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
       }
       break;
+    case ElementwiseActivationType::kLeakyReLU:
+      if (_input->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = [alpha](const IPortableTensor *input, IPortableTensor *output) {
+          nnfw::cker::LeakyReLU(nnfw::cker::LeakyReluParams{alpha}, getTensorShape(input),
+                                reinterpret_cast<const float *>(input->buffer()),
+                                getTensorShape(output),
+                                reinterpret_cast<float *>(output->buffer()));
+        };
+      }
+      else
+      {
+        throw std::runtime_error{"ElementwiseActivationLayer(LeakyReLU): unsupported data type"};
+      }
+      break;
     default:
       throw std::runtime_error("ElementwiseActivationLayer: unsupported op type");
   }
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
index 3ef580041..948ab3b57 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
@@ -32,9 +32,11 @@ namespace ops
 
 enum class ElementwiseActivationType
 {
+  kElu,
   kLogistic,
   kReLU,
-  kTanh
+  kTanh,
+  kLeakyReLU
 };
 
 class ElementwiseActivationLayer : public ::onert::exec::IFunction
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
index ea3c1e7cd..1e17a0828 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
@@ -18,6 +18,7 @@
 
 #include "OperationUtils.h"
 
+#include <cker/operation/LogicalAnd.h>
 #include <cker/operation/LogicalOr.h>
 #include <cker/operation/MaxMin.h>
 
@@ -33,6 +34,25 @@ namespace ops
 namespace
 {
 template <typename T>
+void logicalAndGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                       IPortableTensor *output)
+{
+  if (!HaveSameShapes(lhs, rhs))
+  {
+    nnfw::cker::LogicalAndBroadcast<T>(
+        getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs),
+        reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+        reinterpret_cast<T *>(output->buffer()));
+  }
+  else
+  {
+    nnfw::cker::LogicalAndElementwise<T>(
+        getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+        reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer()));
+  }
+}
+
+template <typename T>
 void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
                       IPortableTensor *output)
 {
@@ -88,6 +108,16 @@ void ElementwiseBinaryLayer::configure(const IPortableTensor *lhs, const IPortab
 
   switch (op_type)
   {
+    case ElementwiseBinaryType::kLogicalAnd:
+      if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
+      {
+        _kernel = logicalAndGeneric<bool>;
+      }
+      else
+      {
+        throw std::runtime_error{"LogicalOr: Unsupported data type"};
+      }
+      break;
     case ElementwiseBinaryType::kLogicalOr:
       if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
       {
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
index 066455e72..15d7f3049 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
@@ -195,6 +195,18 @@ void sinFloat32(const IPortableTensor *input, IPortableTensor *output)
                   getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
 }
 
+void sqrtFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Sqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                   getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void squareFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Square(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                     getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
 template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output)
 {
   if (!HaveSameShapes(input, output))
@@ -363,6 +375,26 @@ void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTen
         throw std::runtime_error{"Sin: Unsupported  data type"};
       }
       break;
+    case ElementwiseUnaryType::kSqrt:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = sqrtFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Sqrt: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kSquare:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = squareFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Square: Unsupported  data type"};
+      }
+      break;
     case ElementwiseUnaryType::kZerosLike:
       if (input->data_type() == OperandType::FLOAT32)
       {
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
index c1765b5b7..54a6fc02a 100644
--- a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
@@ -46,6 +46,8 @@ enum class ElementwiseUnaryType
   kRound,
   kRSqrt,
   kSin,
+  kSqrt,
+  kSquare,
   kZerosLike
 };
 
diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
index b545e6743..5ea0ea893 100644
--- a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
@@ -25,22 +25,19 @@ namespace cpu
 namespace ops
 {
 
-ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _axis(nullptr), _output(nullptr)
+ExpandDimsLayer::ExpandDimsLayer() : _input(nullptr), _output(nullptr)
 {
   // DO NOTHING
 }
 
-void ExpandDimsLayer::configure(const IPortableTensor *input, const IPortableTensor *axis,
-                                IPortableTensor *output)
+void ExpandDimsLayer::configure(const IPortableTensor *input, IPortableTensor *output)
 {
   _input = input;
-  _axis = axis;
   _output = output;
 }
 
 void ExpandDimsLayer::run()
 {
-  // TODO use _axis to calculate shape of output when _axis is not constant
   size_t count = _input->total_size();
   memcpy(_output->buffer(), _input->buffer(), count);
 }
diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h
index b5d4938b5..1b7ead0c3 100644
--- a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h
+++ b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.h
@@ -36,14 +36,12 @@ public:
   ExpandDimsLayer();
 
 public:
-  void configure(const IPortableTensor *input, const IPortableTensor *axis,
-                 IPortableTensor *output);
+  void configure(const IPortableTensor *input, IPortableTensor *output);
 
   void run() override;
 
 private:
   const IPortableTensor *_input;
-  const IPortableTensor *_axis;
   IPortableTensor *_output;
 };
 
diff --git a/runtime/onert/backend/cpu/ops/FillLayer.cc b/runtime/onert/backend/cpu/ops/FillLayer.cc
index df3f8b7cd..5b7c17907 100644
--- a/runtime/onert/backend/cpu/ops/FillLayer.cc
+++ b/runtime/onert/backend/cpu/ops/FillLayer.cc
@@ -29,15 +29,13 @@ namespace cpu
 namespace ops
 {
 
-FillLayer::FillLayer() : _input(nullptr), _value(nullptr), _output(nullptr)
+FillLayer::FillLayer() : _value(nullptr), _output(nullptr)
 {
   // DO NOTHING
 }
 
-void FillLayer::configure(const IPortableTensor *input, const IPortableTensor *value,
-                          IPortableTensor *output)
+void FillLayer::configure(const IPortableTensor *value, IPortableTensor *output)
 {
-  _input = input;
   _value = value;
   _output = output;
 }
@@ -47,28 +45,24 @@ void FillLayer::run()
   switch (_output->data_type())
   {
     case OperandType::FLOAT32:
-      nnfw::cker::Fill<float *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
-                                reinterpret_cast<float *>(_value->buffer()),
+      nnfw::cker::Fill<float *>(reinterpret_cast<float *>(_value->buffer()),
                                 getTensorShape(_output),
                                 reinterpret_cast<float *>(_output->buffer()));
       break;
     case OperandType::INT32:
-      nnfw::cker::Fill<int32_t *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
-                                  reinterpret_cast<int32_t *>(_value->buffer()),
+      nnfw::cker::Fill<int32_t *>(reinterpret_cast<int32_t *>(_value->buffer()),
                                   getTensorShape(_output),
                                   reinterpret_cast<int32_t *>(_output->buffer()));
       break;
     case OperandType::INT64:
-      nnfw::cker::Fill<int64_t *>(getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
-                                  reinterpret_cast<int64_t *>(_value->buffer()),
+      nnfw::cker::Fill<int64_t *>(reinterpret_cast<int64_t *>(_value->buffer()),
                                   getTensorShape(_output),
                                   reinterpret_cast<int64_t *>(_output->buffer()));
       break;
     case OperandType::UINT32:
-      nnfw::cker::Fill<uint32_t *>(
-          getTensorShape(_input), reinterpret_cast<int *>(_input->buffer()),
-          reinterpret_cast<uint32_t *>(_value->buffer()), getTensorShape(_output),
-          reinterpret_cast<uint32_t *>(_output->buffer()));
+      nnfw::cker::Fill<uint32_t *>(reinterpret_cast<uint32_t *>(_value->buffer()),
+                                   getTensorShape(_output),
+                                   reinterpret_cast<uint32_t *>(_output->buffer()));
       break;
     default:
       throw std::runtime_error{"Fill: unsupported data type"};
diff --git a/runtime/onert/backend/cpu/ops/FillLayer.h b/runtime/onert/backend/cpu/ops/FillLayer.h
index 1f17d6b68..ce843654a 100644
--- a/runtime/onert/backend/cpu/ops/FillLayer.h
+++ b/runtime/onert/backend/cpu/ops/FillLayer.h
@@ -35,13 +35,11 @@ class FillLayer : public ::onert::exec::IFunction
 public:
   FillLayer();
 
-  void configure(const IPortableTensor *input, const IPortableTensor *value,
-                 IPortableTensor *output);
+  void configure(const IPortableTensor *value, IPortableTensor *output);
 
   void run() override;
 
 private:
-  const IPortableTensor *_input;
   const IPortableTensor *_value;
   IPortableTensor *_output;
 };
diff --git a/runtime/onert/backend/cpu/ops/MeanLayer.cc b/runtime/onert/backend/cpu/ops/MeanLayer.cc
index 4921ac748..f130692ee 100644
--- a/runtime/onert/backend/cpu/ops/MeanLayer.cc
+++ b/runtime/onert/backend/cpu/ops/MeanLayer.cc
@@ -36,9 +36,24 @@ MeanLayer::MeanLayer() : _input(nullptr), _axes(nullptr), _output(nullptr), _kee
 
 void MeanLayer::MeanFloat32()
 {
-  nnfw::cker::Mean(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                   getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
-                   getReducerAxes(_axes));
+  const auto inputShape = getTensorShape(_input);
+  const auto axisVec = getReducerAxes(_axes);
+  bool axis_is_1_and_2 =
+      _keep_dims && inputShape.DimensionsCount() == 4 && axisVec.size() == 2 &&
+      ((axisVec[0] == 1 && axisVec[1] == 2) || (axisVec[0] == 2 && axisVec[1] == 1));
+
+  if (axis_is_1_and_2)
+  {
+    nnfw::cker::MeanAxis1And2(inputShape, reinterpret_cast<const float *>(_input->buffer()),
+                              getTensorShape(_output),
+                              reinterpret_cast<float *>(_output->buffer()));
+  }
+  else
+  {
+    nnfw::cker::Mean(inputShape, reinterpret_cast<const float *>(_input->buffer()),
+                     getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+                     axisVec);
+  }
 }
 
 void MeanLayer::MeanQuant8()
@@ -57,6 +72,10 @@ void MeanLayer::configure(const IPortableTensor *input, const IPortableTensor *a
   _axes = axes;
   _output = output;
   _keep_dims = keep_dims;
+
+  if (_input->data_type() != OperandType::FLOAT32 &&
+      _input->data_type() != OperandType::QUANT_UINT8_ASYMM)
+    throw std::runtime_error{"Mean: unsupported data type"};
 }
 
 void MeanLayer::run()
diff --git a/runtime/onert/backend/ruy/Backend.h b/runtime/onert/backend/ruy/Backend.h
new file mode 100644
index 000000000..bc8a024d8
--- /dev/null
+++ b/runtime/onert/backend/ruy/Backend.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_BACKEND_H__
+#define __ONERT_BACKEND_RUY_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+#include <backend/Backend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+  Backend() : _config{std::make_shared<Config>()} {}
+
+  std::shared_ptr<IConfig> config() const override { return _config; }
+
+  std::unique_ptr<onert::backend::BackendContext>
+  newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
+             bool) const override
+  {
+    const auto &operands = graph.operands();
+    const auto &operations = graph.operations();
+    auto context = std::make_unique<BackendContext>(this, &graph);
+    auto tr = std::make_shared<cpu_common::TensorRegistry>();
+    auto tb = std::make_shared<TensorBuilder>(tr);
+    context->tensor_registry = tr;
+    context->tensor_builder = tb;
+    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
+                                                            context->external_context());
+    return context;
+  }
+
+private:
+  std::shared_ptr<IConfig> _config;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_BACKEND_H__
diff --git a/runtime/onert/backend/ruy/BackendContext.cc b/runtime/onert/backend/ruy/BackendContext.cc
new file mode 100644
index 000000000..ef686f480
--- /dev/null
+++ b/runtime/onert/backend/ruy/BackendContext.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/cpu_common/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+void BackendContext::initConsts()
+{
+  for (auto &op : operation_list())
+  {
+    constant_initializer->setLayout(op.layout);
+    graph()->operations().at(op.index).accept(*constant_initializer);
+  }
+
+  for (auto ind : operand_list())
+  {
+    const auto &obj = graph()->operands().at(ind);
+    if (obj.isConstant() && !constant_initializer->exist(ind))
+    {
+      constant_initializer->registerDefaultInitializer(ind, obj);
+    }
+  }
+
+  constant_initializer->run();
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                            const ir::OpSequences &op_seqs,
+                                            const ir::LowerInfoMap &lower_info)
+{
+  auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+                  ir::Remove::DUPLICATED;
+  for (auto index : operand_list())
+  {
+    if (model_io.contains(index))
+      continue;
+    const auto &obj = graph()->operands().at(index);
+    const auto frontend_layout = [&]() {
+      if (obj.getUses().size() == 0)
+        return ir::Layout::UNKNOWN;
+      auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
+      for (auto &operation_info : operation_list())
+      {
+        if (operation_info.index == use_op_ind)
+          return operation_info.layout;
+      }
+      return ir::Layout::UNKNOWN;
+    }();
+    const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
+    if (permute_factor.backend() != backend())
+      continue;
+    const auto backend_layout = permute_factor.layout();
+    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+    tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+  }
+
+  // TODO Get compiler options from compiler, and use it rather than getting it from Env
+  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+  {
+    cpu_common::planTensors(*this, order, op_seqs, lower_info);
+  }
+  else
+  {
+    // For the executors that does not have fixed linear execution order:
+    // To make tensors never be deallocated, this is a workaround to use static memory planner
+    for (auto ind : operand_list())
+    {
+      if (tensor_builder->isRegistered(ind))
+        tensor_builder->notifyFirstUse(ind);
+    }
+  }
+
+  tensor_builder->prepare();
+
+  return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                       const ir::OpSequences &op_seqs)
+{
+  FunctionMap ret;
+
+  for (auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    bool assigned = [&]() {
+      for (auto op_info : operation_list())
+        if (op_seq.exist(op_info.index))
+          return true;
+      return false;
+    }();
+    if (!assigned)
+      continue;
+    auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+    ret.emplace_back(op_seq_ind, std::move(fn_seq));
+  }
+
+  initConsts();
+
+  // NOTE For memory optimization, we want to free some operand data
+  for (auto ind : operand_list())
+  {
+    // TODO Remove const_cast
+    auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+    obj.releaseData();
+  }
+
+  for (auto &it : ret)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+  }
+
+  return ret;
+}
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/BackendContext.h b/runtime/onert/backend/ruy/BackendContext.h
new file mode 100644
index 000000000..b965c9a9d
--- /dev/null
+++ b/runtime/onert/backend/ruy/BackendContext.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+#include "ExternalContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, const ir::Graph *graph,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+      : onert::backend::BackendContext(backend, graph, tensor_registry),
+        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+        kernel_gen{kernel_gen}, _external_context(new ExternalContext)
+  {
+  }
+
+  ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                              const ir::OpSequences &op_seqs,
+                              const ir::LowerInfoMap &lower_info) override;
+
+  FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order,
+                         const ir::OpSequences &op_seqs) override;
+
+  std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+  void initConsts();
+  void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                   const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+  // TODO Make it private
+  std::shared_ptr<TensorBuilder> tensor_builder;
+  std::shared_ptr<ConstantInitializer> constant_initializer;
+  std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+  // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
+  //      the thread pool is also created in duplicate
+  // TODO Create one ruy context for session
+  std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/ruy/CMakeLists.txt b/runtime/onert/backend/ruy/CMakeLists.txt
new file mode 100644
index 000000000..206acbfbf
--- /dev/null
+++ b/runtime/onert/backend/ruy/CMakeLists.txt
@@ -0,0 +1,22 @@
+set(LIB_ONERT_BACKEND_RUY onert_backend_ruy)
+
+nnfw_find_package(Ruy REQUIRED)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_RUY} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_lib_ruy)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE nnfw_coverage)
+target_link_libraries(${LIB_ONERT_BACKEND_RUY} PRIVATE ruy)
+
+set_target_properties(${LIB_ONERT_BACKEND_RUY} PROPERTIES OUTPUT_NAME backend_ruy)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+  add_custom_command(TARGET ${LIB_ONERT_BACKEND_RUY} POST_BUILD
+                     COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_RUY}>)
+endif()
+
+install(TARGETS ${LIB_ONERT_BACKEND_RUY} DESTINATION lib)
diff --git a/runtime/onert/backend/cpu/Tensor.cc b/runtime/onert/backend/ruy/Config.cc
index dac8f898b..179caa9a6 100644
--- a/runtime/onert/backend/cpu/Tensor.cc
+++ b/runtime/onert/backend/ruy/Config.cc
@@ -14,18 +14,18 @@
  * limitations under the License.
  */
 
-#include "Tensor.h"
+#include "Config.h"
 
 namespace onert
 {
 namespace backend
 {
-namespace cpu
+namespace ruy
 {
 
-// `dynamic_cast` not working across library boundaries on NDK
-// With this as a key function, `dynamic_cast` works across dl
-ExternalTensor::~ExternalTensor() {}
+bool Config::initialize() { return true; }
+
+ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; }
 
 } // namespace cpu
 } // namespace backend
diff --git a/runtime/onert/backend/ruy/Config.h b/runtime/onert/backend/ruy/Config.h
new file mode 100644
index 000000000..9160dd5b1
--- /dev/null
+++ b/runtime/onert/backend/ruy/Config.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_CONFIG_H__
+#define __ONERT_BACKEND_RUY_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class Config : public IConfig
+{
+public:
+  std::string id() override { return "ruy"; }
+  bool initialize() override;
+  ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
+  bool supportPermutation() override { return true; }
+  bool supportDynamicTensor() override { return true; }
+  bool supportFP16() override { return false; }
+
+  std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_CONFIG_H__
diff --git a/runtime/onert/backend/ruy/ConstantInitializer.h b/runtime/onert/backend/ruy/ConstantInitializer.h
new file mode 100644
index 000000000..24b4d924d
--- /dev/null
+++ b/runtime/onert/backend/ruy/ConstantInitializer.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
+
+#include <backend/cpu_common/ConstantInitializer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+using ConstantInitializer = cpu_common::ConstantInitializer;
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/ruy/ExternalContext.h b/runtime/onert/backend/ruy/ExternalContext.h
new file mode 100644
index 000000000..f51faccb8
--- /dev/null
+++ b/runtime/onert/backend/ruy/ExternalContext.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__
+
+#include <util/ConfigSource.h>
+#include <ruy/context.h>
+
+namespace
+{
+const int kDefaultNumThreadpoolThreads = 4;
+}
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class ExternalContext
+{
+public:
+  ExternalContext() : _ruy_context(new ::ruy::Context)
+  {
+    setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
+  }
+
+  void setMaxNumThreads(int max_num_threads)
+  {
+    const int target_num_threads =
+        max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+    _ruy_context->set_max_num_threads(target_num_threads);
+  }
+
+  ::ruy::Context *ruy_context() const { return _ruy_context.get(); }
+
+private:
+  const std::unique_ptr<::ruy::Context> _ruy_context;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_EXTERNAL_CONTEXT_H__
diff --git a/runtime/onert/backend/ruy/KernelGenerator.cc b/runtime/onert/backend/ruy/KernelGenerator.cc
new file mode 100644
index 000000000..cd2825068
--- /dev/null
+++ b/runtime/onert/backend/ruy/KernelGenerator.cc
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "ops/ConvolutionLayer.h"
+#include "ops/FullyConnectedLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+#include <exec/DynamicShapeInferer.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+KernelGenerator::KernelGenerator(
+    const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+    const std::shared_ptr<TensorBuilder> &tensor_builder,
+    const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+    const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+    const std::shared_ptr<ExternalContext> &external_context)
+    : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
+      _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+      _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+{
+  // DO NOTHING
+}
+
+void KernelGenerator::visit(const ir::OpSequence &op_seq)
+{
+  assert(!_return_fn_seq);
+  assert(_tensor_builder->dynamicTensorManager());
+  assert(_tensor_reg);
+
+  auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
+
+  _return_fn_seq = std::make_unique<exec::FunctionSequence>();
+
+  // Prepare to handle dynamic tensors later
+  auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
+  {
+    dyn_ctx->op_seq = &op_seq;
+    dyn_ctx->operations = &_operations_ctx;
+    dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
+    dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
+
+    _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
+  }
+
+  _current_layout = op_seq.getLayout();
+  for (const auto &operation_idx : op_seq.operations())
+  {
+    const auto &node = _operations_ctx.at(operation_idx);
+    node.accept(*this);
+    _return_fn_seq->append(releaseFunction());
+
+    for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+    {
+      auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+      if (portable_tensor)
+      {
+        assert(portable_tensor->layout() == ir::Layout::NHWC);
+      }
+
+      auto tensor = _tensor_reg->getNativeTensor(ind);
+      if (tensor)
+      {
+        tensor->increase_ref();
+      }
+    }
+  }
+}
+
+void KernelGenerator::visit(const ir::operation::Conv2D &node)
+{
+  using ir::operation::Conv2D;
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
+  const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
+  const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
+
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+  auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
+
+  const auto stride = node.param().stride;
+  const auto activation = node.param().activation;
+  const auto param_padding = node.param().padding;
+  const auto dilation = node.param().dilation;
+  auto fn = std::make_unique<ops::ConvolutionLayer>();
+
+  if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
+  {
+    fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
+                  param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
+                  stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
+                  activation, ofm_tensor, _external_context);
+
+    _return_fn = std::move(fn);
+    return;
+  }
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+  const auto &ker_shape = _ctx.at(ker_index).shape();
+  const auto ker_height = ker_shape.dim(1);
+  const auto ker_width = ker_shape.dim(2);
+
+  const auto padding =
+      ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                           dilation.width_factor, dilation.height_factor);
+
+  fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+                padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+                dilation.width_factor, dilation.height_factor, activation, ofm_tensor,
+                _external_context);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::FullyConnected &node)
+{
+  using ir::operation::FullyConnected;
+
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+  const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+  const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+  const auto activation = node.param().activation;
+  const auto weights_format = node.param().weights_format;
+
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
+  auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
+
+  auto fn = std::make_unique<ops::FullyConnectedLayer>();
+
+  fn->configure(input_tensor, weight_tensor, bias_tensor, activation, weights_format, output_tensor,
+                _external_context);
+
+  _return_fn = std::move(fn);
+}
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/KernelGenerator.h b/runtime/onert/backend/ruy/KernelGenerator.h
new file mode 100644
index 000000000..0f6bd590a
--- /dev/null
+++ b/runtime/onert/backend/ruy/KernelGenerator.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__
+
+#include "ExternalContext.h"
+#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
+#include "Tensor.h"
+
+#include <backend/CustomKernelBuilder.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <ir/Operands.h>
+#include <ir/Operations.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class KernelGenerator : public cpu_common::KernelGeneratorBase
+{
+public:
+  KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+                  const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+                  const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
+                  const std::shared_ptr<ExternalContext> &external_context);
+
+  void visit(const ir::OpSequence &) override;
+  void visit(const ir::operation::Conv2D &) override;
+  void visit(const ir::operation::FullyConnected &) override;
+
+private:
+  const ir::Operands &_ctx;
+  const ir::Operations &_operations_ctx;
+  std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+  std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
+  ir::Layout _current_layout;
+  const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/backend/ruy/StaticTensorManager.h b/runtime/onert/backend/ruy/StaticTensorManager.h
new file mode 100644
index 000000000..af2d25241
--- /dev/null
+++ b/runtime/onert/backend/ruy/StaticTensorManager.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
+
+#include "backend/cpu_common/StaticTensorManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+using StaticTensorManager = cpu_common::StaticTensorManager;
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_STATICTENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/ruy/Tensor.h b/runtime/onert/backend/ruy/Tensor.h
new file mode 100644
index 000000000..60d0fbf77
--- /dev/null
+++ b/runtime/onert/backend/ruy/Tensor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_TENSOR_H__
+#define __ONERT_BACKEND_RUY_TENSOR_H__
+
+#include <backend/cpu_common/Tensor.h>
+#include <ir/Data.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+using Tensor = cpu_common::Tensor;
+using ExternalTensor = cpu_common::ExternalTensor;
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_TENSOR_H__
diff --git a/runtime/onert/backend/ruy/TensorBuilder.cc b/runtime/onert/backend/ruy/TensorBuilder.cc
new file mode 100644
index 000000000..c77defc30
--- /dev/null
+++ b/runtime/onert/backend/ruy/TensorBuilder.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorBuilder.h"
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
+    : _tensor_reg{tensor_reg},
+      _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
+      _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
+{
+  /* empty */
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                       ir::Layout layout)
+{
+  _tensor_info_map.emplace(ind, info);
+
+  // CPU backend supports only one layout as NHWC
+  assert(layout == ir::Layout::NHWC);
+  if (info.isDynamic())
+  {
+    _dynamic_tensor_mgr->buildTensor(ind, info, layout);
+  }
+  else
+  {
+    _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
+  }
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+  assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+  const auto tensor_info = _tensor_info_map.at(ind);
+
+  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+  {
+    const auto size = tensor_info.total_size();
+    _static_tensor_mgr->claimPlan(ind, size);
+  }
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+  {
+    _static_tensor_mgr->releasePlan(ind);
+  }
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+  return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
+
+void TensorBuilder::allocate()
+{
+  // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
+  //      This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
+}
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/TensorBuilder.h b/runtime/onert/backend/ruy/TensorBuilder.h
new file mode 100644
index 000000000..91c07bd82
--- /dev/null
+++ b/runtime/onert/backend/ruy/TensorBuilder.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
+
+#include <backend/cpu_common/DynamicTensorManager.h>
+#include <backend/cpu_common/TensorRegistry.h>
+
+#include <ir/OperandIndexMap.h>
+
+#include "StaticTensorManager.h"
+#include "Tensor.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+
+class TensorBuilder
+{
+public:
+  TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
+
+  /**
+   * @brief     Register tensor information to allocate on CPU backend
+   * @param[in] ind    Operand index
+   * @param[in] info   Operand information
+   * @param[in] layout Operand data layout
+   */
+  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                          ir::Layout backend_layout);
+
+  void notifyFirstUse(const ir::OperandIndex &);
+  void notifyLastUse(const ir::OperandIndex &);
+
+  bool isRegistered(const ir::OperandIndex &) const;
+
+  void prepare(void);
+  void allocate();
+  void postFunctionPrepare() { /* DO NOTHING */}
+
+  IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
+
+private:
+  const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+  std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
+  std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
+  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+};
+
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc b/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc
new file mode 100644
index 000000000..d249b2ce3
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/ConvolutionLayer.cc
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionLayer.h"
+
+#include "../Tensor.h"
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+ConvolutionLayer::ConvolutionLayer()
+    : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+      _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
+      _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
+      _dilationHeightFactor(1), _activation(ir::Activation::NONE),
+      _conv_kernel(new nnfw::ruy::Conv()), _prepare(false)
+{
+  // DO NOTHING
+}
+
+ConvolutionLayer::~ConvolutionLayer() = default;
+
+void ConvolutionLayer::convFloat32()
+{
+  float output_activation_min = 0, output_activation_max = 0;
+  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+
+  nnfw::ruy::ConvParams op_params;
+  op_params.padding_type = getPaddingType(_paddingType);
+  op_params.padding_values.width = _paddingLeft;
+  op_params.padding_values.height = _paddingTop;
+  op_params.stride_width = _strideWidth;
+  op_params.stride_height = _strideHeight;
+  op_params.dilation_width_factor = _dilationWidthFactor;
+  op_params.dilation_height_factor = _dilationHeightFactor;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  nnfw::ruy::Conv &kernel = *_conv_kernel;
+  kernel(op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+         getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
+         getTensorShape(_bias), reinterpret_cast<const float *>(_bias->buffer()),
+         getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+         _external_context->ruy_context());
+}
+
+void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel,
+                                 const IPortableTensor *bias, const ir::PaddingType paddingType,
+                                 const uint32_t paddingLeft, const uint32_t paddingRight,
+                                 const uint32_t paddingTop, const uint32_t paddingBottom,
+                                 const uint32_t strideWidth, const uint32_t strideHeight,
+                                 const uint32_t dilationWidthFactor,
+                                 const uint32_t dilationHeightFactor,
+                                 const ir::Activation activation, IPortableTensor *output,
+                                 const std::shared_ptr<ExternalContext> &external_context)
+{
+  _input = input;
+  _kernel = kernel;
+  _bias = bias;
+  _paddingType = paddingType;
+  _paddingLeft = paddingLeft;
+  _paddingRight = paddingRight;
+  _paddingTop = paddingTop;
+  _paddingBottom = paddingBottom;
+  _strideWidth = strideWidth;
+  _strideHeight = strideHeight;
+  _dilationWidthFactor = dilationWidthFactor;
+  _dilationHeightFactor = dilationHeightFactor;
+  _activation = activation;
+  _output = output;
+  _external_context = external_context;
+}
+
+void ConvolutionLayer::run()
+{
+  prepare();
+
+  if (_input->is_dynamic() || _kernel->is_dynamic())
+  {
+    const auto ifm_shape = _input->getShape().asFeature(_input->layout());
+    const auto ofm_shape = _output->getShape().asFeature(_input->layout());
+    // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+    const auto ker_shape = _kernel->getShape();
+    const auto ker_height = ker_shape.dim(1);
+    const auto ker_width = ker_shape.dim(2);
+
+    ir::Stride stride;
+    stride.vertical = _strideWidth;
+    stride.horizontal = _strideWidth;
+
+    ir::Padding param_padding;
+    param_padding.type = _paddingType;
+    param_padding.param.left = _paddingLeft;
+    param_padding.param.right = _paddingRight;
+    param_padding.param.top = _paddingTop;
+    param_padding.param.bottom = _paddingBottom;
+
+    const auto padding =
+        ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                             _dilationWidthFactor, _dilationHeightFactor);
+
+    _paddingLeft = padding.left;
+    _paddingRight = padding.right;
+    _paddingTop = padding.top;
+    _paddingBottom = padding.bottom;
+  }
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    convFloat32();
+  }
+  else
+  {
+    throw std::runtime_error{"Conv: unsupported data type"};
+  }
+}
+
+void ConvolutionLayer::prepare()
+{
+  if (_prepare)
+    return;
+
+  nnfw::ruy::Conv &kernel = *_conv_kernel;
+  if (_input->data_type() == OperandType::FLOAT32 && _kernel->is_constant())
+  {
+    kernel.prepare(getTensorShape(_input), getTensorShape(_kernel), getTensorShape(_output),
+                   _strideWidth, _strideHeight, _dilationWidthFactor, _dilationHeightFactor);
+  }
+  _prepare = true;
+}
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/ops/ConvolutionLayer.h b/runtime/onert/backend/ruy/ops/ConvolutionLayer.h
new file mode 100644
index 000000000..a55387b93
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/ConvolutionLayer.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__
+#define __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "../ExternalContext.h"
+#include "OperationUtils.h"
+
+#include <ruy/operation/Conv.h>
+#include <exec/IFunction.h>
+#include <functional>
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+class ConvolutionLayer : public ::onert::exec::IFunction
+{
+public:
+  ConvolutionLayer();
+  ~ConvolutionLayer();
+
+public:
+  void convFloat32();
+
+  void configure(const IPortableTensor *input, const IPortableTensor *kernel,
+                 const IPortableTensor *bias, ir::PaddingType _paddingType,
+                 const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
+                 const uint32_t paddingBottom, const uint32_t strideWidth,
+                 const uint32_t strideHeight, const uint32_t dilationWidthFactor,
+                 const uint32_t dilationHeightFactor, const ir::Activation activation,
+                 IPortableTensor *output, const std::shared_ptr<ExternalContext> &external_context);
+
+  void run() override;
+
+  void prepare() override;
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_kernel;
+  const IPortableTensor *_bias;
+  IPortableTensor *_output;
+
+  ir::PaddingType _paddingType;
+  uint32_t _paddingLeft;
+  uint32_t _paddingTop;
+  uint32_t _paddingRight;
+  uint32_t _paddingBottom;
+
+  uint32_t _strideWidth;
+  uint32_t _strideHeight;
+  uint32_t _dilationWidthFactor;
+  uint32_t _dilationHeightFactor;
+
+  ir::Activation _activation;
+
+  std::unique_ptr<nnfw::ruy::Conv> _conv_kernel;
+
+  bool _prepare;
+
+  std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_OPS_CONVOLUTIONLAYER_H__
diff --git a/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc
new file mode 100644
index 000000000..af693e3b4
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedLayer.h"
+
+#include "../Tensor.h"
+#include <ruy/operation/FullyConnected.h>
+#include <ruy/TensorUtils.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+FullyConnectedLayer::FullyConnectedLayer()
+    : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
+      _activation(ir::Activation::NONE), _external_context(nullptr)
+{
+  // DO NOTHING
+}
+
+FullyConnectedLayer::~FullyConnectedLayer() = default;
+
+void FullyConnectedLayer::fullyConnectedFloat32()
+{
+  float output_activation_min = 0, output_activation_max = 0;
+  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+  nnfw::ruy::FullyConnectedParams op_params;
+
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+  op_params.activation = convertActivationType(_activation);
+  op_params.lhs_cacheable = _weights->is_constant();
+  op_params.rhs_cacheable = _input->is_constant();
+
+  nnfw::ruy::FullyConnected(
+      op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+      getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
+      getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
+      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()),
+      _external_context->ruy_context());
+}
+
+void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
+                                    const IPortableTensor *bias, ir::Activation activation,
+                                    ir::FullyConnectedWeightsFormat weights_format,
+                                    IPortableTensor *output,
+                                    const std::shared_ptr<ExternalContext> &external_context)
+{
+  UNUSED_RELEASE(weights_format);
+  _input = input;
+  _weights = weights;
+  _bias = bias;
+  _activation = activation;
+  _output = output;
+  _external_context = external_context;
+}
+
+void FullyConnectedLayer::run()
+{
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    fullyConnectedFloat32();
+  }
+  else
+  {
+    throw std::runtime_error{"FullyConnected: unsupported data type"};
+  }
+}
+
+void FullyConnectedLayer::prepare()
+{
+  if (_bias && _bias->is_constant())
+  {
+    const int bias_size = getTensorShape(_bias).FlatSize();
+    if (nnfw::ruy::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
+    {
+      _bias = nullptr;
+    }
+  }
+}
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h
new file mode 100644
index 000000000..33d560f0b
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/FullyConnectedLayer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__
+#define __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "../ExternalContext.h"
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+class FullyConnectedLayer : public ::onert::exec::IFunction
+{
+public:
+  FullyConnectedLayer();
+  ~FullyConnectedLayer();
+
+public:
+  void fullyConnectedFloat32();
+
+  void configure(const IPortableTensor *input, const IPortableTensor *weights,
+                 const IPortableTensor *bias, ir::Activation activation,
+                 ir::FullyConnectedWeightsFormat weights_format, IPortableTensor *output,
+                 const std::shared_ptr<ExternalContext> &external_context);
+
+  void run() override;
+
+  void prepare() override;
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_weights;
+  const IPortableTensor *_bias;
+  IPortableTensor *_output;
+
+  ir::Activation _activation;
+
+  std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_OPS_FULLYCONNECTEDLAYER_H__
diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.cc b/runtime/onert/backend/ruy/ops/OperationUtils.cc
new file mode 100644
index 000000000..929107b1a
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/OperationUtils.cc
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OperationUtils.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type)
+{
+  switch (ir_padding_type)
+  {
+    case ir::PaddingType::EXPLICIT:
+      return nnfw::ruy::PaddingType::kNone;
+    case ir::PaddingType::SAME:
+      return nnfw::ruy::PaddingType::kSame;
+    case ir::PaddingType::VALID:
+      return nnfw::ruy::PaddingType::kValid;
+    default:
+      throw std::runtime_error("Wrong padding type.");
+      break;
+  }
+}
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.h b/runtime/onert/backend/ruy/ops/OperationUtils.h
new file mode 100644
index 000000000..5dfdc7ec5
--- /dev/null
+++ b/runtime/onert/backend/ruy/ops/OperationUtils.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
+#define __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <ruy/Shape.h>
+#include <ruy/Types.h>
+#include <iostream>
+#include <ir/DataType.h>
+#include <ir/InternalType.h>
+#include <ir/Padding.h>
+
+#include <limits>
+
+using OperandType = onert::ir::DataType;
+
+namespace onert
+{
+namespace backend
+{
+namespace ruy
+{
+namespace ops
+{
+
+inline nnfw::ruy::Shape getTensorShape(const IPortableTensor *tensor)
+{
+  if (tensor == nullptr)
+    return nnfw::ruy::Shape();
+
+  const ir::Shape &shape = tensor->get_info().shape();
+
+  assert(tensor->layout() == ir::Layout::NHWC);
+
+  auto rank = shape.rank();
+  nnfw::ruy::Shape ret(rank);
+  auto data = ret.DimsData();
+  for (int i = 0; i < rank; ++i)
+  {
+    data[i] = shape.dim(i);
+  }
+  return ret;
+}
+
+inline nnfw::ruy::FusedActivationFunctionType convertActivationType(const ir::Activation activation)
+{
+  switch (activation)
+  {
+    case ir::Activation::NONE:
+      return nnfw::ruy::FusedActivationFunctionType::kNone;
+    case ir::Activation::RELU:
+      return nnfw::ruy::FusedActivationFunctionType::kRelu;
+    case ir::Activation::RELU1:
+      return nnfw::ruy::FusedActivationFunctionType::kRelu1;
+    case ir::Activation::RELU6:
+      return nnfw::ruy::FusedActivationFunctionType::kRelu6;
+    case ir::Activation::TANH:
+      return nnfw::ruy::FusedActivationFunctionType::kTanh;
+    case ir::Activation::SIGMOID:
+      return nnfw::ruy::FusedActivationFunctionType::kSigmoid;
+    default:
+      throw std::runtime_error{"RUY backend: Cannot convert activation type"};
+  }
+}
+
+template <typename T>
+void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
+{
+  if (activation == ir::Activation::RELU)
+  {
+    *activation_min = 0;
+    *activation_max = std::numeric_limits<T>::max();
+  }
+  else if (activation == ir::Activation::RELU6)
+  {
+    *activation_min = 0;
+    *activation_max = 6;
+  }
+  else if (activation == ir::Activation::RELU1)
+  {
+    *activation_min = -1;
+    *activation_max = 1;
+  }
+  else if (activation == ir::Activation::SIGMOID)
+  {
+    *activation_min = 0;
+    *activation_max = 1;
+  }
+  else if (activation == ir::Activation::NONE)
+  {
+    *activation_min = std::numeric_limits<T>::lowest();
+    *activation_max = std::numeric_limits<T>::max();
+  }
+  else
+  {
+    std::cout << "Unsupported fused activation function." << std::endl;
+  }
+}
+
+nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type);
+
+} // namespace ops
+} // namespace ruy
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
diff --git a/runtime/onert/backend/ruy/ruy.cc b/runtime/onert/backend/ruy/ruy.cc
new file mode 100644
index 000000000..4f33590e9
--- /dev/null
+++ b/runtime/onert/backend/ruy/ruy.cc
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Backend.h"
+
+extern "C" {
+
+onert::backend::Backend *onert_backend_create() { return new onert::backend::ruy::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
+}
diff --git a/runtime/onert/backend/xnnpack/Backend.h b/runtime/onert/backend/xnnpack/Backend.h
new file mode 100644
index 000000000..b7aef1625
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/Backend.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_BACKEND_H__
+#define __ONERT_BACKEND_XNNPACK_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+
+#include <backend/Backend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+  Backend() : _config{std::make_shared<Config>()} {}
+
+  std::shared_ptr<IConfig> config() const override { return _config; }
+
+  std::unique_ptr<onert::backend::BackendContext>
+  newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
+             bool) const override
+  {
+    const auto &operands = graph.operands();
+    const auto &operations = graph.operations();
+    auto context = std::make_unique<BackendContext>(this, &graph);
+    auto tr = std::make_shared<cpu_common::TensorRegistry>();
+    auto tb = std::make_shared<TensorBuilder>(tr);
+    context->tensor_registry = tr;
+    context->tensor_builder = tb;
+    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
+                                                            context->external_context());
+    return context;
+  }
+
+private:
+  std::shared_ptr<IConfig> _config;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_BACKEND_H__
diff --git a/runtime/onert/backend/xnnpack/BackendContext.cc b/runtime/onert/backend/xnnpack/BackendContext.cc
new file mode 100644
index 000000000..503d088aa
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/BackendContext.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/cpu_common/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+void BackendContext::initConsts()
+{
+  for (auto &op : operation_list())
+  {
+    constant_initializer->setLayout(op.layout);
+    graph()->operations().at(op.index).accept(*constant_initializer);
+  }
+
+  for (auto ind : operand_list())
+  {
+    const auto &obj = graph()->operands().at(ind);
+    if (obj.isConstant() && !constant_initializer->exist(ind))
+    {
+      constant_initializer->registerDefaultInitializer(ind, obj);
+    }
+  }
+
+  constant_initializer->run();
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                            const ir::OpSequences &op_seqs,
+                                            const ir::LowerInfoMap &lower_info)
+{
+  auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+                  ir::Remove::DUPLICATED;
+  for (auto index : operand_list())
+  {
+    if (model_io.contains(index))
+      continue;
+    const auto &obj = graph()->operands().at(index);
+    const auto frontend_layout = [&]() {
+      if (obj.getUses().size() == 0)
+        return ir::Layout::UNKNOWN;
+      auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
+      for (auto &operation_info : operation_list())
+      {
+        if (operation_info.index == use_op_ind)
+          return operation_info.layout;
+      }
+      return ir::Layout::UNKNOWN;
+    }();
+    const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
+    if (permute_factor.backend() != backend())
+      continue;
+    const auto backend_layout = permute_factor.layout();
+    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+    tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+  }
+
+  // TODO Get compiler options from compiler, and use it rather than getting it from Env
+  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+  {
+    cpu_common::planTensors(*this, order, op_seqs, lower_info);
+  }
+  else
+  {
+    // For the executors that does not have fixed linear execution order:
+    // To make tensors never be deallocated, this is a workaround to use static memory planner
+    for (auto ind : operand_list())
+    {
+      if (tensor_builder->isRegistered(ind))
+        tensor_builder->notifyFirstUse(ind);
+    }
+  }
+
+  tensor_builder->prepare();
+
+  return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                       const ir::OpSequences &op_seqs)
+{
+  FunctionMap ret;
+
+  for (auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    bool assigned = [&]() {
+      for (auto op_info : operation_list())
+        if (op_seq.exist(op_info.index))
+          return true;
+      return false;
+    }();
+    if (!assigned)
+      continue;
+    auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+    ret.emplace_back(op_seq_ind, std::move(fn_seq));
+  }
+
+  initConsts();
+
+  // NOTE For memory optimization, we want to free some operand data
+  for (auto ind : operand_list())
+  {
+    // TODO Remove const_cast
+    auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+    obj.releaseData();
+  }
+
+  for (auto &it : ret)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+  }
+
+  return ret;
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/BackendContext.h b/runtime/onert/backend/xnnpack/BackendContext.h
new file mode 100644
index 000000000..f81175b9e
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/BackendContext.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include <util/ConfigSource.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
+#include "ExternalContext.h"
+
+namespace
+{
+const int kDefaultNumThreadpoolThreads = 1;
+}
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, const ir::Graph *graph,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+      : onert::backend::BackendContext(backend, graph, tensor_registry),
+        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+        kernel_gen{kernel_gen}, _external_context(nullptr)
+  {
+    int num_threads = util::getConfigInt(util::config::XNNPACK_THREADS);
+    if (num_threads < 1)
+      num_threads = kDefaultNumThreadpoolThreads; // default num of threads
+    _external_context.reset(new ExternalContext(static_cast<size_t>(num_threads)));
+  }
+
+  ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                              const ir::OpSequences &op_seqs,
+                              const ir::LowerInfoMap &lower_info) override;
+
+  FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order,
+                         const ir::OpSequences &op_seqs) override;
+
+  std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+  void initConsts();
+  void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                   const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+  // TODO Make it private
+  std::shared_ptr<TensorBuilder> tensor_builder;
+  std::shared_ptr<ConstantInitializer> constant_initializer;
+  std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+  std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/xnnpack/CMakeLists.txt b/runtime/onert/backend/xnnpack/CMakeLists.txt
new file mode 100644
index 000000000..e3de31e6f
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/CMakeLists.txt
@@ -0,0 +1,26 @@
+set(LIB_ONERT_BACKEND_XNNPACK onert_backend_xnnpack)
+
+# Unsupported architecture
+nnfw_find_package(Xnnpack QUIET)
+if(NOT Xnnpack_FOUND)
+  return()
+endif(NOT Xnnpack_FOUND)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_XNNPACK} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE nnfw_coverage)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE pthreadpool)
+target_link_libraries(${LIB_ONERT_BACKEND_XNNPACK} PRIVATE XNNPACK)
+
+set_target_properties(${LIB_ONERT_BACKEND_XNNPACK} PROPERTIES OUTPUT_NAME backend_xnnpack)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+  add_custom_command(TARGET ${LIB_ONERT_BACKEND_XNNPACK} POST_BUILD
+                     COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_XNNPACK}>)
+endif()
+
+install(TARGETS ${LIB_ONERT_BACKEND_XNNPACK} DESTINATION lib)
diff --git a/runtime/onert/core/include/backend/IOptimizer.h b/runtime/onert/backend/xnnpack/Config.cc
index 4844d21b9..4d42a3f18 100644
--- a/runtime/onert/core/include/backend/IOptimizer.h
+++ b/runtime/onert/backend/xnnpack/Config.cc
@@ -14,38 +14,31 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_I_OPTIMIZER_H__
-#define __ONERT_BACKEND_I_OPTIMIZER_H__
+#include "Config.h"
 
-namespace onert
-{
-namespace ir
-{
-class LoweredGraph;
-}
-} // namespace onert
+#include <xnnpack.h>
 
 namespace onert
 {
 namespace backend
 {
+namespace xnnpack
+{
 
-/**
- * @brief Class for backend optimizations. This is an optional class so not all backends must have
- * it.
- *
- */
-struct IOptimizer
+Config::~Config() { xnn_deinitialize(); }
+
+bool Config::initialize()
 {
-  virtual ~IOptimizer() = default;
-  /**
-   * @brief Run optimization
-   *
-   */
-  virtual void optimize() = 0;
-};
+  xnn_status status = xnn_initialize(nullptr /* allocator */);
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to initialize XNNPACK"};
+  }
+  return true;
+}
+
+ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; }
 
+} // namespace cpu
 } // namespace backend
 } // namespace onert
-
-#endif // __ONERT_BACKEND_I_OPTIMIZER_H__
diff --git a/runtime/onert/backend/xnnpack/Config.h b/runtime/onert/backend/xnnpack/Config.h
new file mode 100644
index 000000000..2cf7406e5
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/Config.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_CONFIG_H__
+#define __ONERT_BACKEND_XNNPACK_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class Config : public IConfig
+{
+public:
+  virtual ~Config();
+
+public:
+  std::string id() override { return "xnnpack"; }
+  bool initialize() override;
+  ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
+  bool supportPermutation() override { return true; }
+  bool supportDynamicTensor() override { return true; }
+  bool supportFP16() override { return false; }
+
+  std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_CONFIG_H__
diff --git a/runtime/onert/backend/xnnpack/ConstantInitializer.h b/runtime/onert/backend/xnnpack/ConstantInitializer.h
new file mode 100644
index 000000000..45cdd8cd9
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ConstantInitializer.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
+
+#include <backend/cpu_common/ConstantInitializer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+using ConstantInitializer = cpu_common::ConstantInitializer;
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/core/include/backend/IExternalContext.h b/runtime/onert/backend/xnnpack/ExternalContext.cc
index 88ffb502c..3a9fe1b55 100644
--- a/runtime/onert/core/include/backend/IExternalContext.h
+++ b/runtime/onert/backend/xnnpack/ExternalContext.cc
@@ -14,21 +14,23 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_IEXTERNAL_CONTEXT_H__
-#define __ONERT_BACKEND_IEXTERNAL_CONTEXT_H__
+#include "ExternalContext.h"
+
+#include <cassert>
 
 namespace onert
 {
 namespace backend
 {
+namespace xnnpack
+{
 
-struct IExternalContext
+ExternalContext::ExternalContext(size_t num_threads)
+    : _threadpool(pthreadpool_create(num_threads), pthreadpool_destroy)
 {
-  virtual ~IExternalContext() = default;
-  virtual void setMaxNumThreads(int) = 0;
-};
+  assert(_threadpool);
+}
 
+} // namespace xnnpack
 } // namespace backend
 } // namespace onert
-
-#endif // __ONERT_BACKEND_IEXTERNAL_CONTEXT__
diff --git a/runtime/onert/backend/xnnpack/ExternalContext.h b/runtime/onert/backend/xnnpack/ExternalContext.h
new file mode 100644
index 000000000..682fd2e4e
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ExternalContext.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__
+
+#include <memory>
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class ExternalContext
+{
+public:
+  ExternalContext(size_t num_threads);
+
+public:
+  pthreadpool *getThreadPool() { return _threadpool.get(); }
+
+private:
+  std::unique_ptr<pthreadpool, decltype(&pthreadpool_destroy)> _threadpool;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_EXTERNAL_CONTEXT_H__
diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.cc b/runtime/onert/backend/xnnpack/KernelGenerator.cc
new file mode 100644
index 000000000..b7d3f60fb
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/KernelGenerator.cc
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "ops/ConvolutionLayer.h"
+#include "ops/DepthwiseConvolutionLayer.h"
+#include "ops/FullyConnectedLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+#include <exec/DynamicShapeInferer.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+KernelGenerator::KernelGenerator(
+    const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+    const std::shared_ptr<TensorBuilder> &tensor_builder,
+    const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+    const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+    const std::shared_ptr<ExternalContext> &external_context)
+    : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
+      _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+      _current_layout(ir::Layout::UNKNOWN), _external_context(external_context)
+{
+  // DO NOTHING
+}
+
+void KernelGenerator::visit(const ir::OpSequence &op_seq)
+{
+  assert(!_return_fn_seq);
+  assert(_tensor_builder->dynamicTensorManager());
+  assert(_tensor_reg);
+
+  auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
+
+  _return_fn_seq = std::make_unique<exec::FunctionSequence>();
+
+  // Prepare to handle dynamic tensors later
+  auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
+  {
+    dyn_ctx->op_seq = &op_seq;
+    dyn_ctx->operations = &_operations_ctx;
+    dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
+    dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
+
+    _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
+  }
+
+  _current_layout = op_seq.getLayout();
+  for (const auto &operation_idx : op_seq.operations())
+  {
+    const auto &node = _operations_ctx.at(operation_idx);
+    node.accept(*this);
+    _return_fn_seq->append(releaseFunction());
+
+    for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
+    {
+      auto portable_tensor = _tensor_reg->getPortableTensor(ind);
+      if (portable_tensor)
+      {
+        assert(portable_tensor->layout() == ir::Layout::NHWC);
+      }
+
+      auto tensor = _tensor_reg->getNativeTensor(ind);
+      if (tensor)
+      {
+        tensor->increase_ref();
+      }
+    }
+  }
+}
+
+void KernelGenerator::visit(const ir::operation::Conv2D &node)
+{
+  using ir::operation::Conv2D;
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
+  const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
+  const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
+
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+  auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
+
+  const auto stride = node.param().stride;
+  const auto activation = node.param().activation;
+  const auto param_padding = node.param().padding;
+  const auto dilation = node.param().dilation;
+  auto fn = std::make_unique<ops::ConvolutionLayer>(_external_context);
+
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+  const auto &ker_shape = _ctx.at(ker_index).shape();
+  const auto ker_height = ker_shape.dim(1);
+  const auto ker_width = ker_shape.dim(2);
+
+  const auto padding =
+      ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                           dilation.width_factor, dilation.height_factor);
+
+  fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+                padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+                dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
+{
+  using ir::operation::DepthwiseConv2D;
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
+  const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
+  const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
+
+  const auto stride = node.param().stride;
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_layout);
+  // Kernel format is [1, kernel_height, kernel_width, depth_out].
+  const auto &ker_shape = _ctx.at(ker_index).shape();
+  const auto ker_height = ker_shape.dim(1);
+  const auto ker_width = ker_shape.dim(2);
+  const auto dilation_width = node.param().dilation.width_factor;
+  const auto dilation_height = node.param().dilation.height_factor;
+  const auto param_padding = node.param().padding;
+  const auto padding = ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width,
+                                            ker_height, dilation_width, dilation_height);
+  const auto multiplier = node.param().multiplier;
+  const auto activation = node.param().activation;
+
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index);
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index);
+  auto ker_tensor = _tensor_reg->getPortableTensor(ker_index);
+  auto bias_tensor = _tensor_reg->getPortableTensor(bias_index);
+
+  auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>(_external_context);
+
+  fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+                padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+                multiplier, dilation_width, dilation_height, activation, ofm_tensor);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::FullyConnected &node)
+{
+  using ir::operation::FullyConnected;
+
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+  const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+  const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+  const auto activation = node.param().activation;
+
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
+  auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
+
+  auto fn = std::make_unique<ops::FullyConnectedLayer>(_external_context);
+
+  fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor);
+
+  _return_fn = std::move(fn);
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.h b/runtime/onert/backend/xnnpack/KernelGenerator.h
new file mode 100644
index 000000000..265824204
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/KernelGenerator.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__
+
+#include "ExternalContext.h"
+#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
+#include "Tensor.h"
+
+#include <backend/CustomKernelBuilder.h>
+#include <backend/cpu_common/KernelGeneratorBase.h>
+#include <ir/Operands.h>
+#include <ir/Operations.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class KernelGenerator : public cpu_common::KernelGeneratorBase
+{
+public:
+  KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+                  const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
+                  const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
+                  const std::shared_ptr<ExternalContext> &external_context);
+
+  void visit(const ir::OpSequence &) override;
+  void visit(const ir::operation::Conv2D &) override;
+  void visit(const ir::operation::DepthwiseConv2D &) override;
+  void visit(const ir::operation::FullyConnected &) override;
+
+private:
+  const ir::Operands &_ctx;
+  const ir::Operations &_operations_ctx;
+  std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+  std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
+  ir::Layout _current_layout;
+  const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/backend/xnnpack/StaticTensorManager.h b/runtime/onert/backend/xnnpack/StaticTensorManager.h
new file mode 100644
index 000000000..f7344e8d8
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/StaticTensorManager.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
+
+#include "backend/cpu_common/StaticTensorManager.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+using StaticTensorManager = cpu_common::StaticTensorManager;
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_STATICTENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/xnnpack/Tensor.h b/runtime/onert/backend/xnnpack/Tensor.h
new file mode 100644
index 000000000..b39cbd266
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/Tensor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_H__
+#define __ONERT_BACKEND_XNNPACK_TENSOR_H__
+
+#include <backend/cpu_common/Tensor.h>
+#include <ir/Data.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+using Tensor = cpu_common::Tensor;
+using ExternalTensor = cpu_common::ExternalTensor;
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_TENSOR_H__
diff --git a/runtime/onert/backend/xnnpack/TensorBuilder.cc b/runtime/onert/backend/xnnpack/TensorBuilder.cc
new file mode 100644
index 000000000..b570144ce
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/TensorBuilder.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorBuilder.h"
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
+    : _tensor_reg{tensor_reg},
+      _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
+      _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
+{
+  /* empty */
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                       ir::Layout layout)
+{
+  _tensor_info_map.emplace(ind, info);
+
+  // XNNPACK backend supports only one layout as NHWC
+  assert(layout == ir::Layout::NHWC);
+  if (info.isDynamic())
+  {
+    _dynamic_tensor_mgr->buildTensor(ind, info, layout);
+  }
+  else
+  {
+    _static_tensor_mgr->buildTensor(ind, info, layout, info.isConstant());
+  }
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+  assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+  const auto tensor_info = _tensor_info_map.at(ind);
+
+  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+  {
+    const auto size = tensor_info.total_size();
+    _static_tensor_mgr->claimPlan(ind, size);
+  }
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
+  {
+    _static_tensor_mgr->releasePlan(ind);
+  }
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+  return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
+
+void TensorBuilder::allocate()
+{
+  // NOTE For now nothing to do. Allocation is done in prepare stage, which is not appropriate
+  //      This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
+}
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/TensorBuilder.h b/runtime/onert/backend/xnnpack/TensorBuilder.h
new file mode 100644
index 000000000..dddfedbf9
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/TensorBuilder.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
+
+#include <backend/cpu_common/DynamicTensorManager.h>
+#include <backend/cpu_common/TensorRegistry.h>
+
+#include <ir/OperandIndexMap.h>
+
+#include "StaticTensorManager.h"
+#include "Tensor.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+
+class TensorBuilder
+{
+public:
+  TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
+
+  /**
+   * @brief     Register tensor information to allocate on XNNPACK backend
+   * @param[in] ind    Operand index
+   * @param[in] info   Operand information
+   * @param[in] layout Operand data layout
+   */
+  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                          ir::Layout backend_layout);
+
+  void notifyFirstUse(const ir::OperandIndex &);
+  void notifyLastUse(const ir::OperandIndex &);
+
+  bool isRegistered(const ir::OperandIndex &) const;
+
+  void prepare(void);
+  void allocate();
+  void postFunctionPrepare() { /* DO NOTHING */}
+
+  IDynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
+
+private:
+  const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
+  std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
+  std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
+  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+};
+
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc
new file mode 100644
index 000000000..0612995c2
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.cc
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionLayer.h"
+
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+ConvolutionLayer::ConvolutionLayer(const std::shared_ptr<ExternalContext> external_context)
+    : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+      _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0),
+      _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0),
+      _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE)
+{
+  // DO NOTHING
+}
+
+void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel,
+                                 const IPortableTensor *bias, ir::PaddingType padding_type,
+                                 const uint32_t padding_left, const uint32_t padding_right,
+                                 const uint32_t padding_top, const uint32_t padding_bottom,
+                                 const uint32_t stride_width, const uint32_t stride_height,
+                                 const uint32_t dilation_width_factor,
+                                 const uint32_t dilation_height_factor,
+                                 const ir::Activation activation, IPortableTensor *output)
+{
+  _input = input;
+  _kernel = kernel;
+  _bias = bias;
+  _padding_type = padding_type;
+  _padding_left = padding_left;
+  _padding_right = padding_right;
+  _padding_top = padding_top;
+  _padding_bottom = padding_bottom;
+  _stride_width = stride_width;
+  _stride_height = stride_height;
+  _dilation_width_factor = dilation_width_factor;
+  _dilation_height_factor = dilation_height_factor;
+  _activation = activation;
+  _output = output;
+
+  // TODO Support not nhwc layer
+  assert(_input->layout() == ir::Layout::NHWC);
+
+  assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
+         _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
+}
+
+void ConvolutionLayer::run()
+{
+  assert(_external_context && _external_context->getThreadPool());
+  if (!_setup)
+  {
+    _setup = setup();
+    assert(_setup);
+  }
+
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
+    if (status != xnn_status_success)
+    {
+      throw std::runtime_error{"failed to run FP32 Convolution operator"};
+    }
+  }
+  else
+  {
+    throw std::runtime_error{"XNNPACK Conv: unsupported data type"};
+  }
+}
+
+bool ConvolutionLayer::create()
+{
+  float output_activation_min = 0.f, output_activation_max = 0.f;
+  CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
+
+  // NHWC
+  // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+  const auto &kernel_shape = _kernel->getShape();
+  uint32_t kernel_height = kernel_shape.dim(1);
+  uint32_t kernel_width = kernel_shape.dim(2);
+  uint32_t output_channels = kernel_shape.dim(0);
+  uint32_t input_channels = kernel_shape.dim(3);
+  assert(static_cast<uint32_t>(_input->getShape().dim(3)) == input_channels);
+  assert(static_cast<uint32_t>(_output->getShape().dim(3)) == output_channels);
+
+  enum xnn_status status = xnn_create_convolution2d_nhwc_f32(
+      _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
+      _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor,
+      1 /* groups */, input_channels /* group_input_channels */,
+      output_channels /* group_output_channels */, input_channels /* input_channel_stride */,
+      output_channels /* output_channel_stride */,
+      reinterpret_cast<const float *>(_kernel->buffer()),
+      reinterpret_cast<const float *>(_bias->buffer()), output_activation_min,
+      output_activation_max, 0, &_kernel_op);
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 Convolution operator"};
+  }
+  assert(_kernel_op != nullptr);
+  return true;
+}
+
+bool ConvolutionLayer::setup()
+{
+  if (_input->buffer() == nullptr || _output->buffer() == nullptr)
+  {
+    // it could be models's input or output
+    return false;
+  }
+
+  uint32_t input_width = _input->getShape().dim(2);
+  uint32_t input_height = _input->getShape().dim(1);
+  uint32_t batch_size = _input->getShape().dim(0);
+  enum xnn_status status = xnn_setup_convolution2d_nhwc_f32(
+      _kernel_op, batch_size, input_height, input_width,
+      reinterpret_cast<const float *>(_input->buffer()),
+      reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 Convolution operator"};
+  }
+  return true;
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h
new file mode 100644
index 000000000..6cbaa9f3a
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/ConvolutionLayer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__
+
+#include "Layer.h"
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class ConvolutionLayer : public Layer
+{
+public:
+  ConvolutionLayer(const std::shared_ptr<ExternalContext> external_context);
+
+public:
+  void configure(const IPortableTensor *input, const IPortableTensor *kernel,
+                 const IPortableTensor *bias, ir::PaddingType padding_type,
+                 const uint32_t padding_left, const uint32_t padding_right,
+                 const uint32_t padding_top, const uint32_t padding_bottom,
+                 const uint32_t stride_width, const uint32_t stride_height,
+                 const uint32_t dilation_width_factor, const uint32_t dilation_height_factor,
+                 const ir::Activation activation, IPortableTensor *output);
+
+  void run() override;
+
+  bool create() override;
+  bool setup() override;
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_kernel;
+  const IPortableTensor *_bias;
+  IPortableTensor *_output;
+
+  ir::PaddingType _padding_type;
+  uint32_t _padding_left;
+  uint32_t _padding_top;
+  uint32_t _padding_right;
+  uint32_t _padding_bottom;
+
+  uint32_t _stride_width;
+  uint32_t _stride_height;
+  uint32_t _dilation_width_factor;
+  uint32_t _dilation_height_factor;
+
+  ir::Activation _activation;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_CONVOLUTION_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc
new file mode 100644
index 000000000..947f04194
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.cc
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthwiseConvolutionLayer.h"
+
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+DepthwiseConvolutionLayer::DepthwiseConvolutionLayer(
+    const std::shared_ptr<ExternalContext> external_context)
+    : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+      _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0),
+      _padding_right(0), _padding_bottom(0), _stride_width(0), _stride_height(0), _multiplier(1),
+      _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE)
+{
+  // DO NOTHING
+}
+
+void DepthwiseConvolutionLayer::configure(
+    const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
+    ir::PaddingType padding_type, const uint32_t padding_left, const uint32_t padding_right,
+    const uint32_t padding_top, const uint32_t padding_bottom, const uint32_t stride_width,
+    const uint32_t stride_height, const uint32_t multiplier, const uint32_t dilation_width_factor,
+    const uint32_t dilation_height_factor, const ir::Activation activation, IPortableTensor *output)
+{
+  _input = input;
+  _kernel = kernel;
+  _bias = bias;
+  _padding_type = padding_type;
+  _padding_left = padding_left;
+  _padding_right = padding_right;
+  _padding_top = padding_top;
+  _padding_bottom = padding_bottom;
+  _stride_width = stride_width;
+  _stride_height = stride_height;
+  _multiplier = multiplier;
+  _dilation_width_factor = dilation_width_factor;
+  _dilation_height_factor = dilation_height_factor;
+  _activation = activation;
+  _output = output;
+
+  // TODO Support not nhwc layer
+  assert(_input->layout() == ir::Layout::NHWC);
+
+  assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
+         _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
+}
+
+void DepthwiseConvolutionLayer::run()
+{
+  assert(_external_context && _external_context->getThreadPool());
+  if (!_setup)
+  {
+    _setup = setup();
+    assert(_setup);
+  }
+
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
+    if (status != xnn_status_success)
+    {
+      throw std::runtime_error{"failed to run FP32 DepthwiseConvolution operator"};
+    }
+  }
+  else
+  {
+    throw std::runtime_error{"XNNPACK DepthwiseConv: unsupported data type"};
+  }
+}
+
+bool DepthwiseConvolutionLayer::create()
+{
+  float output_activation_min = 0.f, output_activation_max = 0.f;
+  CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
+
+  // NHWC
+  // Kernel format is [1, kernel_height, kernel_width, depth_out].
+  const auto &kernel_shape = _kernel->getShape();
+  uint32_t kernel_height = kernel_shape.dim(1);
+  uint32_t kernel_width = kernel_shape.dim(2);
+  uint32_t output_channels = kernel_shape.dim(3);
+  uint32_t input_channels = _input->getShape().dim(3);
+  assert(static_cast<uint32_t>(_output->getShape().dim(3)) == output_channels);
+  assert(output_channels == input_channels * _multiplier);
+
+  enum xnn_status status = xnn_create_convolution2d_nhwc_f32(
+      _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
+      _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor,
+      input_channels /* groups */, 1 /* group_input_channels */,
+      _multiplier /* group_output_channels */, input_channels /* input_channel_stride */,
+      output_channels /* output_channel_stride */,
+      reinterpret_cast<const float *>(_kernel->buffer()),
+      reinterpret_cast<const float *>(_bias->buffer()), output_activation_min,
+      output_activation_max, XNN_FLAG_DEPTHWISE_CONVOLUTION, &_kernel_op);
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
+  }
+  assert(_kernel_op != nullptr);
+  return true;
+}
+
+bool DepthwiseConvolutionLayer::setup()
+{
+  if (_input->buffer() == nullptr || _output->buffer() == nullptr)
+  {
+    // it could be models's input or output
+    return false;
+  }
+
+  uint32_t input_width = _input->getShape().dim(2);
+  uint32_t input_height = _input->getShape().dim(1);
+  uint32_t batch_size = _input->getShape().dim(0);
+  enum xnn_status status = xnn_setup_convolution2d_nhwc_f32(
+      _kernel_op, batch_size, input_height, input_width,
+      reinterpret_cast<const float *>(_input->buffer()),
+      reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
+  }
+  return true;
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h
new file mode 100644
index 000000000..10f840ae7
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/DepthwiseConvolutionLayer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__
+
+#include "Layer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class DepthwiseConvolutionLayer : public Layer
+{
+public:
+  DepthwiseConvolutionLayer(const std::shared_ptr<ExternalContext> external_context);
+
+public:
+  void configure(const IPortableTensor *input, const IPortableTensor *kernel,
+                 const IPortableTensor *bias, ir::PaddingType padding_type,
+                 const uint32_t padding_left, const uint32_t padding_right,
+                 const uint32_t padding_top, const uint32_t padding_bottom,
+                 const uint32_t stride_width, const uint32_t stride_height,
+                 const uint32_t multiplier, const uint32_t dilation_width_factor,
+                 const uint32_t dilation_height_factor, const ir::Activation activation,
+                 IPortableTensor *output);
+
+  void run() override;
+
+  bool create() override;
+  bool setup() override;
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_kernel;
+  const IPortableTensor *_bias;
+  IPortableTensor *_output;
+
+  ir::PaddingType _padding_type;
+  uint32_t _padding_left;
+  uint32_t _padding_top;
+  uint32_t _padding_right;
+  uint32_t _padding_bottom;
+
+  uint32_t _stride_width;
+  uint32_t _stride_height;
+  uint32_t _multiplier;
+  uint32_t _dilation_width_factor;
+  uint32_t _dilation_height_factor;
+
+  ir::Activation _activation;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_DEPTHWISE_CONVOLUTION_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc
new file mode 100644
index 000000000..d595fda36
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedLayer.h"
+
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+FullyConnectedLayer::FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context)
+    : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+      _activation(ir::Activation::NONE)
+{
+  // DO NOTHING
+}
+
+void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
+                                    const IPortableTensor *bias, ir::Activation activation,
+                                    IPortableTensor *output)
+{
+  _input = input;
+  _kernel = weights;
+  _bias = bias;
+  _activation = activation;
+  _output = output;
+
+  // TODO Support not nhwc layer
+  assert(_input->layout() == ir::Layout::NHWC);
+
+  assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
+         _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
+}
+
+void FullyConnectedLayer::run()
+{
+  assert(_external_context && _external_context->getThreadPool());
+  if (!_setup)
+  {
+    _setup = setup();
+    assert(_setup);
+  }
+
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
+    if (status != xnn_status_success)
+    {
+      throw std::runtime_error{"failed to run FP32 FullyConnected operator"};
+    }
+  }
+  else
+  {
+    throw std::runtime_error{"XNNPACK FC: unsupported data type"};
+  }
+}
+
+bool FullyConnectedLayer::create()
+{
+  float output_activation_min = 0.f, output_activation_max = 0.f;
+  CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
+
+  const auto &kernel_shape = _kernel->getShape();
+  assert(kernel_shape.rank() == 2);
+  uint32_t output_channels = kernel_shape.dim(0);
+  uint32_t input_channels = kernel_shape.dim(1);
+
+  const auto &input_shape = _input->getShape();
+  const auto &output_shape = _output->getShape();
+  uint32_t flag = 0;
+  if (input_shape.rank() != output_shape.rank())
+  {
+    flag |= XNN_FLAG_TENSORFLOW_RESHAPE_2D;
+    assert(input_shape.num_elements() % input_channels == 0);
+  }
+  else
+  {
+    assert(static_cast<uint32_t>(input_shape.dim(input_shape.rank() - 1)) == input_channels);
+  }
+
+  assert(_kernel && _kernel->buffer());
+  const float *kernel_buffer = reinterpret_cast<const float *>(_kernel->buffer());
+  const float *bias_buffer = (_bias) ? reinterpret_cast<const float *>(_bias->buffer()) : nullptr;
+
+  enum xnn_status status = xnn_create_fully_connected_nc_f32(
+      input_channels, output_channels, input_channels /* input stride */,
+      output_channels /* output stride */, kernel_buffer, bias_buffer, output_activation_min,
+      output_activation_max, flag, &_kernel_op);
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
+  }
+  assert(_kernel_op != nullptr);
+  return true;
+}
+
+bool FullyConnectedLayer::setup()
+{
+  if (_input->buffer() == nullptr || _output->buffer() == nullptr)
+  {
+    // it could be models's input or output
+    return false;
+  }
+
+  uint32_t batch_size = _input->getShape().num_elements() / _kernel->getShape().dim(1);
+  enum xnn_status status = xnn_setup_fully_connected_nc_f32(
+      _kernel_op, batch_size, reinterpret_cast<const float *>(_input->buffer()),
+      reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
+  }
+  return true;
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h
new file mode 100644
index 000000000..883607ef9
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
+
+#include "Layer.h"
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class FullyConnectedLayer : public Layer
+{
+public:
+  FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context);
+
+public:
+  void configure(const IPortableTensor *input, const IPortableTensor *_kernel,
+                 const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output);
+
+  void run() override;
+
+  bool create() override;
+  bool setup() override;
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_kernel;
+  const IPortableTensor *_bias;
+  IPortableTensor *_output;
+
+  ir::Activation _activation;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/Layer.h b/runtime/onert/backend/xnnpack/ops/Layer.h
new file mode 100644
index 000000000..68b610f33
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/Layer.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__
+
+#include <exec/IFunction.h>
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+#include "../ExternalContext.h"
+#include "../Tensor.h"
+
+#include <cassert>
+#include <memory>
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class Layer : public ::onert::exec::IFunction
+{
+public:
+  Layer(const std::shared_ptr<ExternalContext> external_context)
+      : _kernel_op{nullptr}, _create{false}, _setup{false}, _external_context{external_context}
+  {
+    // DO NOTHING
+  }
+
+  ~Layer()
+  {
+    if (_kernel_op)
+      xnn_delete_operator(_kernel_op);
+  }
+
+public:
+  void prepare() override
+  {
+    if (_create)
+      return;
+
+    _create = create();
+    assert(_create);
+
+    _setup = setup();
+  }
+  virtual bool create() = 0;
+  virtual bool setup() = 0;
+
+protected:
+  xnn_operator_t _kernel_op;
+  bool _create;
+  bool _setup;
+  const std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/OperationUtils.h b/runtime/onert/backend/xnnpack/ops/OperationUtils.h
new file mode 100644
index 000000000..5102e32dd
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/ops/OperationUtils.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
+
+// duplicated from cpu/ops/OperationUtils.h
+#include <ir/InternalType.h>
+#include <ir/Padding.h>
+#include <ir/DataType.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+using OperandType = ir::DataType;
+
+template <typename T>
+void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
+{
+  if (activation == ir::Activation::RELU)
+  {
+    *activation_min = 0;
+    *activation_max = std::numeric_limits<T>::max();
+  }
+  else if (activation == ir::Activation::RELU6)
+  {
+    *activation_min = 0;
+    *activation_max = 6;
+  }
+  else if (activation == ir::Activation::RELU1)
+  {
+    *activation_min = -1;
+    *activation_max = 1;
+  }
+  else if (activation == ir::Activation::SIGMOID)
+  {
+    *activation_min = 0;
+    *activation_max = 1;
+  }
+  else if (activation == ir::Activation::NONE)
+  {
+    *activation_min = std::numeric_limits<T>::lowest();
+    *activation_max = std::numeric_limits<T>::max();
+  }
+  else
+  {
+    throw std::runtime_error{"Unsupported fused activation function"};
+  }
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
diff --git a/runtime/onert/backend/xnnpack/xnnpack.cc b/runtime/onert/backend/xnnpack/xnnpack.cc
new file mode 100644
index 000000000..38a6c5572
--- /dev/null
+++ b/runtime/onert/backend/xnnpack/xnnpack.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Backend.h"
+
+#include <util/logging.h>
+
+extern "C" {
+onert::backend::Backend *onert_backend_create()
+{
+  VERBOSE(onert_backend_create) << "'xnnpack' loaded\n";
+  return new onert::backend::xnnpack::Backend;
+}
+
+void onert_backend_destroy(onert::backend::Backend *backend)
+{
+  VERBOSE(onert_backend_create) << "'xnnpack' unloaded\n";
+  delete backend;
+}
+}
diff --git a/runtime/onert/core/include/backend/BackendContext.h b/runtime/onert/core/include/backend/BackendContext.h
index 1eba29550..4d212156a 100644
--- a/runtime/onert/core/include/backend/BackendContext.h
+++ b/runtime/onert/core/include/backend/BackendContext.h
@@ -19,6 +19,8 @@
 
 #include <memory>
 #include "ir/Graph.h"
+#include "ir/LowerInfoMap.h"
+#include "exec/FunctionSequence.h"
 
 namespace onert
 {
@@ -26,12 +28,10 @@ namespace backend
 {
 
 class Backend;
-class IConstantInitializer;
-class IKernelGenerator;
-class ITensorRegister;
 struct ITensorRegistry;
-struct ITensorBuilder;
-struct IOptimizer;
+
+using FunctionMap =
+    std::vector<std::pair<ir::OpSequenceIndex, std::unique_ptr<exec::FunctionSequence>>>;
 
 class BackendContext
 {
@@ -46,15 +46,8 @@ public:
 
 public:
   BackendContext(const Backend *backend, const ir::Graph *graph,
-                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
-                 std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
-                 std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
-                 std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
-                 std::shared_ptr<ITensorRegister> tensor_register = nullptr,
-                 std::shared_ptr<IOptimizer> optimizer = nullptr)
-      : _backend{backend}, _graph{graph}, tensor_registry{tensor_registry},
-        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
-        kernel_gen{kernel_gen}, tensor_register{tensor_register}, optimizer{optimizer}
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr)
+      : _backend{backend}, _graph{graph}, tensor_registry{tensor_registry}
   {
   }
 
@@ -66,8 +59,19 @@ public:
 
   const Backend *backend() const { return _backend; }
   const ir::Graph *graph() const { return _graph; }
-  const std::vector<OperationInfo> &operation_list() { return _operation_list; }
-  const std::vector<ir::OperandIndex> &operand_list() { return _operand_list; }
+  const std::vector<OperationInfo> &operation_list() const { return _operation_list; }
+  const std::vector<ir::OperandIndex> &operand_list() const { return _operand_list; }
+
+  virtual ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &,
+                                      const ir::OpSequences &, const ir::LowerInfoMap &)
+  {
+    return nullptr;
+  }
+  virtual FunctionMap genKernels(const std::vector<onert::ir::OpSequenceIndex> &,
+                                 const ir::OpSequences &)
+  {
+    return {};
+  }
 
 private:
   const Backend *_backend{nullptr};
@@ -77,11 +81,6 @@ private:
 
 public:
   std::shared_ptr<ITensorRegistry> tensor_registry;
-  std::shared_ptr<ITensorBuilder> tensor_builder;
-  std::shared_ptr<IConstantInitializer> constant_initializer;
-  std::shared_ptr<IKernelGenerator> kernel_gen;
-  std::shared_ptr<ITensorRegister> tensor_register;
-  std::shared_ptr<IOptimizer> optimizer;
 };
 
 using BackendContexts = std::unordered_map<const Backend *, std::unique_ptr<BackendContext>>;
diff --git a/runtime/onert/core/include/backend/ITensorBuilder.h b/runtime/onert/core/include/backend/ITensorBuilder.h
deleted file mode 100644
index 97721cf19..000000000
--- a/runtime/onert/core/include/backend/ITensorBuilder.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ITENSOR_BUILDER_H__
-#define __ONERT_BACKEND_ITENSOR_BUILDER_H__
-
-#include <map>
-
-#include "ir/Index.h"
-#include "ir/OperandInfo.h"
-#include "ir/Operation.h"
-#include "ir/Layout.h"
-#include "ITensor.h"
-#include "ITensorManager.h"
-#include "ITensorRegistry.h"
-#include "IDynamicTensorManager.h"
-
-namespace onert
-{
-namespace backend
-{
-
-struct ITensorBuilder
-{
-  using IterateFunction = std::function<void(const ir::OperandIndex &)>;
-
-  virtual ~ITensorBuilder(void) = default;
-
-  /**
-   * @brief Register tensor information to allocate on backend
-   *
-   * @param ind Index
-   * @param info Info
-   * @param backend_layout Backend layout
-   * @param as_const Whether this tensor is constant
-   */
-  virtual void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                                  ir::Layout backend_layout) = 0;
-
-  /**
-   * @brief Check if the tensor has been registered with @c registerTensorInfo
-   *
-   * @return true If the tensor has been registered
-   * @return false Otherwise
-   */
-  virtual bool isRegistered(const ir::OperandIndex &) const = 0;
-
-public: // methods for static tensor allocation
-  /**
-   * @brief Let the tensor builder know first use(start of lifetime) of a tensor
-   *        Must be called before calling @c prepare
-   *        Must be run up to once for each tensor before calling @c notifyLastUse
-   *        NOTE: Useful only for static models
-   */
-  virtual void notifyFirstUse(const ir::OperandIndex &) = 0;
-  /**
-   * @brief Let the tensor builder know last use(end of lifetime) of a tensor
-   *        Must be run up to once for each tensor after calling @c notifyFirstUse
-   *        NOTE: Useful only for static models
-   */
-  virtual void notifyLastUse(const ir::OperandIndex &) = 0;
-  /**
-   * @brief Prepare the tensors
-   *        Before calling this, all the tensors must be registered
-   */
-  virtual void prepare(void) = 0;
-  /**
-   * @brief Allocate the tensors
-   *        Before calling this, @c prepare must be called
-   */
-  virtual void allocate() = 0;
-  /**
-   * @brief Some actions after functions' @c IFunction::prepare method.
-   *        This is called right after each function's @c IFunction::prepare function has been
-   *        called.
-   */
-  virtual void postFunctionPrepare() = 0;
-
-public: // methods for dynamic tensor allocation
-  /**
-   * @brief Get dynamicTensorManager. If a backend does not support dynamic tensor, exception
-   *        will be thrown.
-   *
-   * @return pointer of IDynamicTensorManager object
-   *
-   * @note   Since it is a pointer, its life time is from the cration of TensorBuilder
-   *         to the end of execution
-   */
-  virtual IDynamicTensorManager *dynamicTensorManager(void) { return nullptr; }
-};
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ITENSOR_BUILDER_H__
diff --git a/runtime/onert/core/include/backend/ITensorRegister.h b/runtime/onert/core/include/backend/ITensorRegister.h
deleted file mode 100644
index b8e521ce3..000000000
--- a/runtime/onert/core/include/backend/ITensorRegister.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ITENSOR_REGISTER_H__
-#define __ONERT_BACKEND_ITENSOR_REGISTER_H__
-
-#include "ir/LowerInfoMap.h"
-#include "ITensorBuilder.h"
-#include "ir/Layout.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/OperandInfo.h"
-#include "ir/Operands.h"
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace backend
-{
-
-class ITensorRegister : public ir::OperationVisitor
-{
-public:
-  virtual ~ITensorRegister() = default;
-
-public:
-  void registerTensors(const ir::OpSequence &op_seq, const ir::LowerInfoMap *lower_info_map)
-  {
-    _current_op_seq_layout = op_seq.getLayout();
-    _lower_info_map = lower_info_map;
-    assert(_lower_info_map != nullptr);
-    assert(tensor_builder().get() != nullptr);
-    op_seq.accept(*this);
-  }
-
-protected:
-  virtual const ir::Operands &operands() const = 0;
-  virtual std::shared_ptr<ITensorBuilder> tensor_builder() const = 0;
-
-protected:
-#define OP(InternalName)                                                                   \
-  void visit(const ir::operation::InternalName &node) override                             \
-  {                                                                                        \
-    for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs()) \
-    {                                                                                      \
-      defaultRegisterTensorInfo(ind);                                                      \
-    }                                                                                      \
-  }
-#include "ir/Operations.lst"
-#undef OP
-
-protected:
-  void defaultRegisterTensorInfo(const ir::OperandIndex &index) const
-  {
-    if (tensor_builder()->isRegistered(index))
-    {
-      return;
-    }
-
-    const auto &obj = operands().at(index);
-    const auto frontend_layout = frontendLayout();
-    const auto backend_layout = backendLayout(index);
-    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
-                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
-    tensor_builder()->registerTensorInfo(index, backend_info, backend_layout);
-  }
-
-protected:
-  ir::Layout frontendLayout() const { return _current_op_seq_layout; }
-  ir::Layout backendLayout(const ir::OperandIndex &index) const
-  {
-    assert(_lower_info_map != nullptr);
-    const auto lower_info = _lower_info_map->operand.at(index).get();
-    return lower_info->def_factors().getOnlyElement().layout();
-  }
-
-private:
-  ir::Layout _current_op_seq_layout;
-  const ir::LowerInfoMap *_lower_info_map{nullptr};
-};
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ITENSOR_REGISTER_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/BackendContextHelpers.h b/runtime/onert/core/include/backend/cpu_common/BackendContextHelpers.h
new file mode 100644
index 000000000..19e7b7c99
--- /dev/null
+++ b/runtime/onert/core/include/backend/cpu_common/BackendContextHelpers.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__
+#define __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__
+
+#include <vector>
+
+#include "ir/Index.h"
+#include "ir/OpSequences.h"
+#include "ir/LowerInfoMap.h"
+#include "util/logging.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu_common
+{
+
+// TODO Remove the template param BackendContext once unification of cpu backend context is done
+template <typename T_BackendContext>
+void planTensors(const T_BackendContext &ctx, const std::vector<onert::ir::OpSequenceIndex> &order,
+                 const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info)
+{
+  auto graph = ctx.graph();
+  auto tensor_builder = ctx.tensor_builder;
+
+  ir::OperandIndexMap<uint32_t> uses_map;
+  ir::OperandIndexMap<uint32_t> def_map;
+  ir::OperandIndexSequence constants;
+
+  auto model_io =
+      (graph->getInputs() + graph->getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;
+
+  // Prepare scanning
+  for (auto ind : ctx.operand_list())
+  {
+    if (model_io.contains(ind))
+      continue;
+    const auto &obj = graph->operands().at(ind);
+    const auto &li = lower_info.operand.at(ind);
+    if (li->def_factors().getOnlyElement().backend() != ctx.backend())
+      continue;
+
+    // Ignore unused tensor
+    if (li->def_factors().size() == 0 && li->use_factors().size() == 0)
+    {
+      VERBOSE_F() << "Operand #" << ind.value() << " will not be used. no more process."
+                  << std::endl;
+      return;
+    }
+
+    uses_map[ind] = obj.getUses().size();
+    def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+    if (obj.isConstant())
+      constants.append(ind);
+
+    auto factor = li->def_factors().getOnlyElement();
+    if (!tensor_builder->isRegistered(ind))
+    {
+      // These tensors do not exist in any op_seq (No use and def)
+      const auto info = obj.info();
+      const auto backend_layout = factor.layout();
+      // TODO Change tensor info to have permuted shape
+      tensor_builder->registerTensorInfo(ind, info, backend_layout);
+    }
+  }
+
+  // Start scanning to do notify{First|Last}Use for each tensor
+
+  // If a tensor is a constant, increase the use of the tensor and allocate it first.
+  // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+  // deallocated last.
+  for (const auto &ind : constants)
+  {
+    uses_map[ind]++;
+    tensor_builder->notifyFirstUse(ind);
+  }
+
+  // At each operation,
+  // 1. Scan DEF of outputs. If the DEF, allocate it
+  // 2. Scan DEF of inputs. If variable tensor, allocate it
+  // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+  for (const auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    for (const auto &op_idx : op_seq.operations())
+    {
+      auto op_inputs = graph->operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED |
+                       ir::Remove::UNDEFINED;
+      auto op_outputs = graph->operations().at(op_idx).getOutputs() | ir::Remove::DUPLICATED |
+                        ir::Remove::UNDEFINED;
+
+      // Define outputs
+      for (const auto &ind : op_outputs)
+      {
+        if (model_io.contains(ind))
+          continue;
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        assert(def_map.find(ind) != def_map.end());
+        if (def_map[ind])
+        {
+          def_map[ind] = 0;
+          tensor_builder->notifyFirstUse(ind);
+        }
+      }
+
+      // Scan variable tensors
+      // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+      // non-constant because of less memory usage by memory planning in here
+      for (const auto &ind : op_inputs)
+      {
+        if (model_io.contains(ind))
+          continue;
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        const auto &operand = graph->operands().at(ind);
+        if (operand.info().isVariable())
+        {
+          // The variable tensor with buffer is not supported yet
+          assert(operand.data() == nullptr);
+          assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+          assert(lower_info.operand.at(ind)->def_factors().size() == 1 &&
+                 lower_info.operand.at(ind)->use_factors().size() == 1);
+          assert(uses_map[ind] == 1 && def_map[ind] == 0);
+          tensor_builder->notifyFirstUse(ind);
+        }
+      }
+
+      for (const auto &ind : op_inputs)
+      {
+        if (model_io.contains(ind))
+          continue;
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        assert(uses_map.find(ind) != uses_map.end());
+        assert(uses_map[ind] > 0);
+        uses_map[ind]--;
+        if (uses_map[ind] == 0)
+        {
+          // plan for deallocation of static tensornode
+          tensor_builder->notifyLastUse(ind);
+
+          // plan for deallocation of dynamic tensor
+          auto dyn_tensor_manager = tensor_builder->dynamicTensorManager();
+          auto *tensor = ctx.tensor_registry->getITensor(ind);
+          assert(tensor);
+          dyn_tensor_manager->planDealloc(op_idx, tensor);
+        }
+      }
+    }
+  }
+
+  // Dispose and validate
+  for (const auto &ind : constants)
+  {
+    --uses_map[ind];
+    if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+    {
+      tensor_builder->notifyLastUse(ind);
+    }
+  }
+
+  assert(
+      std::all_of(uses_map.begin(), uses_map.end(),
+                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+  assert(
+      std::all_of(def_map.begin(), def_map.end(),
+                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+}
+
+} // namespace cpu_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_COMMON_BACKEND_CONTEXT_HELPERS_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/ConstantInitializer.h b/runtime/onert/core/include/backend/cpu_common/ConstantInitializer.h
new file mode 100644
index 000000000..679355599
--- /dev/null
+++ b/runtime/onert/core/include/backend/cpu_common/ConstantInitializer.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__
+#define __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__
+
+#include "TensorRegistry.h"
+
+#include "ConstantInitializerBase.h"
+#include <ir/Operands.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu_common
+{
+
+class ConstantInitializer : public ConstantInitializerBase
+{
+public:
+  ConstantInitializer(const ir::Operands &operands,
+                      const std::shared_ptr<ITensorRegistry> &tensor_reg);
+
+public:
+  void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
+
+  // TODO: For now the only cpu backend supports constant tensor to use data from external
+  // If the other backend supports (to do this,
+  // ExternalTensor should be abstract such as IExternal, maybe),
+  // this can be an interface of cpu_common::ConstantInitializerBase
+  void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &);
+
+private:
+  std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
+
+private:
+  std::shared_ptr<ITensorRegistry> _tensor_reg;
+};
+
+} // namespace cpu_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/core/include/backend/IConstantInitializer.h b/runtime/onert/core/include/backend/cpu_common/ConstantInitializerBase.h
index 149acecb4..d4c65de38 100644
--- a/runtime/onert/core/include/backend/IConstantInitializer.h
+++ b/runtime/onert/core/include/backend/cpu_common/ConstantInitializerBase.h
@@ -14,20 +14,21 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__
-#define __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__
+#ifndef __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__
+#define __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__
 
 #include <unordered_map>
 #include <functional>
 
-#include "ITensorBuilder.h"
 #include "ir/Coordinates.h"
 #include "ir/Layout.h"
 #include "ir/Operand.h"
 #include "ir/Operands.h"
 #include "ir/OperationVisitor.h"
 #include "ir/OpSequence.h"
+#include "backend/ITensorRegistry.h"
 #include "util/logging.h"
+#include "backend/ITensorRegistry.h"
 
 namespace
 {
@@ -153,11 +154,13 @@ namespace onert
 {
 namespace backend
 {
+namespace cpu_common
+{
 
-class IConstantInitializer : public ir::OperationVisitor
+class ConstantInitializerBase : public ir::OperationVisitor
 {
 public:
-  virtual ~IConstantInitializer() = default;
+  virtual ~ConstantInitializerBase() = default;
 
 public:
   void run()
@@ -178,15 +181,15 @@ public:
   }
 
 public:
-  IConstantInitializer(const ir::Operands &operands)
-      : _operands{operands}, _current_op_seq_layout{ir::Layout::UNKNOWN}
+  ConstantInitializerBase(const ir::Operands &operands)
+      : _operands{operands}, _current_layout{ir::Layout::UNKNOWN}
   {
   }
 
 public:
   using Initializer = std::function<void(const ir::Operand &, backend::ITensor &)>;
 
-  void setLayout(ir::Layout layout) { _current_op_seq_layout = layout; }
+  void setLayout(ir::Layout layout) { _current_layout = layout; }
 
 protected:
   virtual std::shared_ptr<ITensorRegistry> tensor_registry() const = 0;
@@ -221,10 +224,11 @@ public:
 protected:
   const ir::Operands &_operands;
   std::unordered_map<ir::OperandIndex, Initializer> _init_map;
-  ir::Layout _current_op_seq_layout; // TODO Rename this to _current_layout
+  ir::Layout _current_layout;
 };
 
+} // namespace cpu_common
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_BACKEND_ICONSTANT_INITIALIZER_H__
+#endif // __ONERT_BACKEND_CPU_COMMON_CONSTANT_INITIALIZER_BASE_H__
diff --git a/runtime/onert/core/include/backend/IKernelGenerator.h b/runtime/onert/core/include/backend/cpu_common/KernelGeneratorBase.h
index afc34ec21..49a589768 100644
--- a/runtime/onert/core/include/backend/IKernelGenerator.h
+++ b/runtime/onert/core/include/backend/cpu_common/KernelGeneratorBase.h
@@ -14,28 +14,30 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_IKERNEL_GENERATOR_H__
-#define __ONERT_BACKEND_IKERNEL_GENERATOR_H__
+#ifndef __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__
+#define __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__
 
 #include <assert.h>
 #include <memory>
 #include <functional>
 
-#include "ITensorBuilder.h"
 #include "ir/OperationVisitor.h"
 #include "ir/OpSequence.h"
 #include <memory>
 #include "exec/FunctionSequence.h"
+#include "backend/ITensorRegistry.h"
 
 namespace onert
 {
 namespace backend
 {
+namespace cpu_common
+{
 
-class IKernelGenerator : public ir::OperationVisitor
+class KernelGeneratorBase : public ir::OperationVisitor
 {
 public:
-  virtual ~IKernelGenerator() = default;
+  virtual ~KernelGeneratorBase() = default;
 
   std::unique_ptr<exec::IFunction> releaseFunction()
   {
@@ -70,7 +72,8 @@ protected:
   std::unique_ptr<exec::FunctionSequence> _return_fn_seq; // TODO Extract this out
 };
 
+} // namespace cpu_common
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_BACKEND_IKERNEL_GENERATOR_H__
+#endif // __ONERT_BACKEND_CPU_COMMON_KERNEL_GENERATOR_BASE_H__
diff --git a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
index fa50b551e..850bcf2f2 100644
--- a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
+++ b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
@@ -17,9 +17,11 @@
 #ifndef __ONERT_BACKEND_CPU_COMMON_STATICTENSOR_MANAGER_H__
 #define __ONERT_BACKEND_CPU_COMMON_STATICTENSOR_MANAGER_H__
 
-#include "MemoryManager.h"
-
 #include "backend/IStaticTensorManager.h"
+#include "backend/cpu_common/DynamicTensorManager.h"
+#include "backend/cpu_common/MemoryManager.h"
+#include "backend/cpu_common/TensorRegistry.h"
+#include "backend/ITensorManager.h"
 #include "ir/OperandIndexMap.h"
 #include "ir/OperandInfo.h"
 #include "TensorRegistry.h"
@@ -37,12 +39,10 @@ class StaticTensorManager : public backend::IStaticTensorManager
 {
 public:
   StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
-                      DynamicMemoryManager *dynamic_mem_mgr);
+                      DynamicTensorManager *dynamic_tensor_manager);
   virtual ~StaticTensorManager() = default;
 
-  void allocateConsts(void);
   void allocateNonconsts(void);
-  void deallocateConsts(void);
   void deallocateNonconsts(void);
 
   void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
@@ -54,11 +54,10 @@ public:
   void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
 
 private:
-  std::unique_ptr<DynamicMemoryManager> _const_mgr;
   std::unique_ptr<MemoryManager> _nonconst_mgr;
   const std::shared_ptr<TensorRegistry> _tensors;
   ir::OperandIndexMap<bool> _as_constants;
-  DynamicMemoryManager *_dynamic_mem_mgr;
+  DynamicTensorManager *_dynamic_tensor_manager;
 };
 
 } // namespace cpu_common
diff --git a/runtime/onert/core/include/backend/cpu_common/Tensor.h b/runtime/onert/core/include/backend/cpu_common/Tensor.h
index 5fa20e15d..5fbf4e729 100644
--- a/runtime/onert/core/include/backend/cpu_common/Tensor.h
+++ b/runtime/onert/core/include/backend/cpu_common/Tensor.h
@@ -21,6 +21,7 @@
 
 #include <backend/IPortableTensor.h>
 #include <ir/OperandInfo.h>
+#include <ir/Data.h>
 
 namespace onert
 {
@@ -177,6 +178,91 @@ private:
   std::shared_ptr<Allocator> _allocator;
 };
 
+/**
+ * @brief Class that uses data from external memory that is not managed by a backend
+ *        instead of allocating and copying the data. ExternalTensor's data pointer points to
+ *        an address of memory such as where memory is already allocated, or mmapped area.
+ *        This is meaning that ExternalTensor can take all of types' ir::Data.
+ *        To support this, assume below things no padding, always NHWC layout,
+ *        constant tensor and not dynamic.
+ */
+class ExternalTensor : public Tensor
+{
+public:
+  ExternalTensor() = delete;
+  virtual ~ExternalTensor();
+
+public:
+  ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
+      : Tensor(info, layout, nullptr)
+  {
+    assert(_layout == ir::Layout::NHWC);
+    assert(_info.isConstant());
+    assert(_info.isDynamic() == false);
+  }
+
+public:
+  /**
+   * @brief     set Data to be shared from external so that this ExternalTensor will not be
+   *            allocated on CPU backend
+   * @param[in] data    data of Operand to be set
+   */
+  void setData(const std::shared_ptr<ir::Data> data)
+  {
+    assert(data != nullptr);
+    _data = data;
+    // Note. Some op such as cker::Conv could take buffer as nullptr.
+    // That's why _buffer also would be used
+    _buffer = const_cast<uint8_t *>(_data->base());
+  }
+
+public:
+  uint8_t *buffer() const override { return _buffer; }
+
+  bool is_constant() const override { return true; }
+  bool is_dynamic() const override { return false; }
+  void set_dynamic() override
+  {
+    throw std::runtime_error("This tensor does not support changing dynamic");
+  }
+
+  void setShape(const ir::Shape &) override
+  {
+    throw std::runtime_error("This tensor does not support changing shape");
+  }
+
+  void increase_ref() override { ++_num_references; }
+
+  void decrease_ref() override
+  {
+    assert(_data != nullptr);
+    assert(_num_references > 0);
+    --_num_references;
+    if (_num_references == 0)
+    {
+      _data.reset();
+      _buffer = nullptr;
+    }
+  }
+
+  /**
+   * @brief Reset reference count to zero and release data
+   */
+  void reset_ref() override
+  {
+    assert(_data != nullptr);
+    assert(_num_references > 0);
+    _num_references = 0;
+
+    _data.reset();
+    _buffer = nullptr;
+  }
+
+  int32_t num_references() override { return _num_references; }
+
+private:
+  std::shared_ptr<const ir::Data> _data;
+};
 } // namespace cpu_common
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/core/include/compiler/BackendManager.h b/runtime/onert/core/include/compiler/BackendManager.h
index af13d13f7..7850e21eb 100644
--- a/runtime/onert/core/include/compiler/BackendManager.h
+++ b/runtime/onert/core/include/compiler/BackendManager.h
@@ -34,7 +34,7 @@ class BackendManager
 public:
   using backend_create_t = backend::Backend *(*)();
   using backend_destroy_t = void (*)(backend::Backend *);
-  using dlhandle_destroy_t = void (*)(void *);
+  using dlhandle_destroy_t = std::function<void(void *)>;
 
   static BackendManager &get();
 
diff --git a/runtime/onert/core/include/compiler/Compiler.h b/runtime/onert/core/include/compiler/Compiler.h
index 3098be7ba..68b862d58 100644
--- a/runtime/onert/core/include/compiler/Compiler.h
+++ b/runtime/onert/core/include/compiler/Compiler.h
@@ -24,6 +24,7 @@
 
 #include "ir/Graph.h"
 #include "exec/IExecutor.h"
+#include "util/TracingCtx.h"
 
 namespace onert
 {
@@ -48,7 +49,6 @@ struct CompilerOptions
 {
   // GENERAL OPTIONS
   std::vector<std::string> backend_list;
-  bool is_primary_subgraph; // TODO Remove this out of this struct as it is not user-given option
 
   // OPTIONS ONLY FOR DEBUGGING/PROFILING
   std::string trace_filepath; //< File path to save trace records
@@ -60,6 +60,8 @@ struct CompilerOptions
   bool he_profiling_mode; //< Whether HEScheduler profiling mode ON/OFF
   bool disable_compile;   //< Run with Interpreter if true, try compilation otherwise
   bool fp16_enable;       //< Whether fp16 mode ON/OFF
+
+  util::TracingCtx *tracing_ctx; //< Profiling information
 };
 
 CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs);
@@ -73,8 +75,9 @@ public:
   /**
    * @brief     Construct a new Compiler object
    * @param[in] subgs All subgraphs of a model
+   * @param[in] tracing_ctx Profiling information
    */
-  Compiler(const std::shared_ptr<ir::Subgraphs> &subgs);
+  Compiler(const std::shared_ptr<ir::Subgraphs> &subgs, util::TracingCtx *tracing_ctx);
 
 public:
   /**
diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h
index aadba6857..f115ab9a8 100644
--- a/runtime/onert/core/include/compiler/LoweredGraph.h
+++ b/runtime/onert/core/include/compiler/LoweredGraph.h
@@ -67,8 +67,7 @@ private:
                   const compiler::BackendResolver &backend_resolver);
 
   void manipulateLowerInfo(
-      ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
-      bool is_primary);
+      ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info);
   void dumpLowerInfo();
   bool mergeable(const ir::OpSequenceIndex &op_seq_index, const ir::OperationIndex &node_index,
                  ir::Layout layout, const compiler::BackendResolver &backend_resolver);
diff --git a/runtime/onert/core/include/compiler/StaticShapeInferer.h b/runtime/onert/core/include/compiler/StaticShapeInferer.h
index 05f2679fc..33a2f62d9 100644
--- a/runtime/onert/core/include/compiler/StaticShapeInferer.h
+++ b/runtime/onert/core/include/compiler/StaticShapeInferer.h
@@ -68,7 +68,7 @@ private:
 
 private:
   // TODO Define visitors for operations. List them in alphabetic order.
-  void visit(const ir::operation::ArgMax &op) override;
+  void visit(const ir::operation::ArgMinMax &op) override;
   void visit(const ir::operation::BatchMatMul &op) override;
   void visit(const ir::operation::BCQFullyConnected &op) override;
   void visit(const ir::operation::BCQGather &op) override;
diff --git a/runtime/onert/core/include/exec/DynamicShapeInferer.h b/runtime/onert/core/include/exec/DynamicShapeInferer.h
index d2eb83159..1f3a13b06 100644
--- a/runtime/onert/core/include/exec/DynamicShapeInferer.h
+++ b/runtime/onert/core/include/exec/DynamicShapeInferer.h
@@ -49,7 +49,7 @@ public:
 public:
   // TODO Define visitors for operations. List them in alphabetic order.
   // Remove TODO when any op starting from the alphabet is added
-  void visit(const ir::operation::ArgMax &op) override;
+  void visit(const ir::operation::ArgMinMax &op) override;
   void visit(const ir::operation::BatchMatMul &op) override;
   void visit(const ir::operation::BCQFullyConnected &op) override;
   void visit(const ir::operation::BCQGather &op) override;
diff --git a/runtime/onert/core/include/exec/IExecutor.h b/runtime/onert/core/include/exec/IExecutor.h
index 1d2831dd0..345bec8eb 100644
--- a/runtime/onert/core/include/exec/IExecutor.h
+++ b/runtime/onert/core/include/exec/IExecutor.h
@@ -18,17 +18,32 @@
  * @file  IExecutor.h
  * @brief This file defines interface of Executor
  */
-#ifndef __ONERT_EXEC_I_EXECUTOR_H_
-#define __ONERT_EXEC_I_EXECUTOR_H_
+#ifndef __ONERT_EXEC_I_EXECUTOR_H__
+#define __ONERT_EXEC_I_EXECUTOR_H__
 
 #include "ir/Graph.h"
 #include "IFunction.h"
 #include "IODescription.h"
+#include "ir/Index.h"
 #include "ir/OperationIndexMap.h"
-#include "backend/IDynamicTensorManager.h"
+
+#include <cstdint>
+#include <memory>
+#include <unordered_map>
 
 namespace onert
 {
+namespace backend
+{
+class IPortableTensor;
+namespace controlflow
+{
+class IOTensor;
+}
+}
+}
+namespace onert
+{
 namespace exec
 {
 class IExecutionObserver;
@@ -60,11 +75,29 @@ struct IExecutor
   virtual void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) = 0;
 
   /**
-   * @brief     Start execution
+   * @brief     Execute with user-given input/output description (for primary subgraph)
    * @param[in] desc Input and output description
    * @note      This method should be thread-safe
    */
   virtual void execute(const IODescription &desc) = 0;
+
+  /**
+   * @brief Execute with given input/output tensors
+   *
+   * For non-primary subgraphs, input and output tensors must be given.
+   *
+   * @param[in] inputs tensors that are passed as inputs
+   * @param[in] outputs tensors that are passed as outputs
+   */
+  virtual void execute(const std::vector<backend::IPortableTensor *> &inputs,
+                       const std::vector<backend::IPortableTensor *> &outputs) = 0;
+
+  /**
+   * @brief Get output tensor objects
+   *
+   * @return Vector of @c IOTensor
+   */
+  virtual const std::vector<backend::controlflow::IOTensor *> &getOutputTensors() const = 0;
 };
 
 using ExecutorMap = std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>>;
@@ -72,4 +105,4 @@ using ExecutorMap = std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecu
 } // namespace exec
 } // namespace onert
 
-#endif // __ONERT_EXEC_I_EXECUTOR_H_
+#endif // __ONERT_EXEC_I_EXECUTOR_H__
diff --git a/runtime/onert/core/include/ir/DataType.h b/runtime/onert/core/include/ir/DataType.h
index 9f09de3fb..e77c308ea 100644
--- a/runtime/onert/core/include/ir/DataType.h
+++ b/runtime/onert/core/include/ir/DataType.h
@@ -37,6 +37,7 @@ enum class DataType
   INT64 = 8,
   QUANT_INT8_ASYMM = 9,
   QUANT_INT16_ASYMM = 10,
+  QUANT_INT8_SYMM_PER_CHANNEL = 11,
 };
 
 size_t sizeOfDataType(DataType data_type);
diff --git a/runtime/onert/core/include/ir/Operations.Include.h b/runtime/onert/core/include/ir/Operations.Include.h
index 1f20ee665..45fadc474 100644
--- a/runtime/onert/core/include/ir/Operations.Include.h
+++ b/runtime/onert/core/include/ir/Operations.Include.h
@@ -17,69 +17,69 @@
 // This file has no ifdef guard intentionally
 
 #include "ir/operation/AddN.h"
+#include "ir/operation/ArgMinMax.h"
+#include "ir/operation/BatchMatMul.h"
 #include "ir/operation/BatchToSpaceND.h"
+#include "ir/operation/BCQFullyConnected.h"
+#include "ir/operation/BCQGather.h"
 #include "ir/operation/BinaryArithmetic.h"
 #include "ir/operation/BroadcastTo.h"
-#include "ir/operation/Conv2D.h"
-#include "ir/operation/Pool2D.h"
+#include "ir/operation/Comparison.h"
 #include "ir/operation/Concat.h"
-#include "ir/operation/Reshape.h"
-#include "ir/operation/Fill.h"
-#include "ir/operation/FullyConnected.h"
-#include "ir/operation/Softmax.h"
-#include "ir/operation/Transpose.h"
-#include "ir/operation/Permute.h"
-#include "ir/operation/Reduce.h"
+#include "ir/operation/Conv2D.h"
+#include "ir/operation/ConvertFp16ToFp32.h"
+#include "ir/operation/ConvertFp32ToFp16.h"
+#include "ir/operation/Custom.h"
+#include "ir/operation/DepthToSpace.h"
 #include "ir/operation/DepthwiseConv2D.h"
-#include "ir/operation/Slice.h"
-#include "ir/operation/StridedSlice.h"
-#include "ir/operation/Squeeze.h"
+#include "ir/operation/Einsum.h"
 #include "ir/operation/ElementwiseActivation.h"
 #include "ir/operation/ElementwiseBinary.h"
 #include "ir/operation/ElementwiseUnary.h"
+#include "ir/operation/EmbeddingLookup.h"
 #include "ir/operation/ExpandDims.h"
-#include "ir/operation/Comparison.h"
+#include "ir/operation/Fill.h"
+#include "ir/operation/FullyConnected.h"
+#include "ir/operation/FusedBatchNorm.h"
+#include "ir/operation/Gather.h"
+#include "ir/operation/HashtableLookup.h"
+#include "ir/operation/If.h"
+#include "ir/operation/InstanceNorm.h"
+#include "ir/operation/L2Normalization.h"
+#include "ir/operation/LocalResponseNormalization.h"
+#include "ir/operation/LogSoftmax.h"
 #include "ir/operation/LSTM.h"
+#include "ir/operation/MatrixBandPart.h"
+#include "ir/operation/OneHot.h"
+#include "ir/operation/Pack.h"
+#include "ir/operation/Pad.h"
+#include "ir/operation/Permute.h"
+#include "ir/operation/Pool2D.h"
+#include "ir/operation/Pow.h"
+#include "ir/operation/PReLU.h"
+#include "ir/operation/Range.h"
+#include "ir/operation/Rank.h"
+#include "ir/operation/Reduce.h"
+#include "ir/operation/Reshape.h"
 #include "ir/operation/ResizeBilinear.h"
 #include "ir/operation/ResizeNearestNeighbor.h"
 #include "ir/operation/Reverse.h"
 #include "ir/operation/RNN.h"
+#include "ir/operation/Select.h"
+#include "ir/operation/Shape.h"
+#include "ir/operation/Slice.h"
+#include "ir/operation/Softmax.h"
 #include "ir/operation/SpaceToBatchND.h"
 #include "ir/operation/SpaceToDepth.h"
-#include "ir/operation/EmbeddingLookup.h"
-#include "ir/operation/L2Normalization.h"
-#include "ir/operation/HashtableLookup.h"
-#include "ir/operation/InstanceNorm.h"
-#include "ir/operation/PReLU.h"
-#include "ir/operation/TransposeConv.h"
-#include "ir/operation/SquaredDifference.h"
-#include "ir/operation/TopKV2.h"
-#include "ir/operation/Gather.h"
-#include "ir/operation/ArgMax.h"
-#include "ir/operation/LocalResponseNormalization.h"
-#include "ir/operation/DepthToSpace.h"
-#include "ir/operation/Pack.h"
-#include "ir/operation/Select.h"
 #include "ir/operation/Split.h"
 #include "ir/operation/SplitV.h"
+#include "ir/operation/SquaredDifference.h"
+#include "ir/operation/Squeeze.h"
+#include "ir/operation/StatelessRandomUniform.h"
+#include "ir/operation/StridedSlice.h"
+#include "ir/operation/Tile.h"
+#include "ir/operation/TopKV2.h"
+#include "ir/operation/Transpose.h"
+#include "ir/operation/TransposeConv.h"
 #include "ir/operation/Unpack.h"
-#include "ir/operation/Pad.h"
-#include "ir/operation/Custom.h"
-#include "ir/operation/Einsum.h"
-#include "ir/operation/OneHot.h"
-#include "ir/operation/Shape.h"
-#include "ir/operation/ConvertFp32ToFp16.h"
-#include "ir/operation/ConvertFp16ToFp32.h"
-#include "ir/operation/If.h"
 #include "ir/operation/While.h"
-#include "ir/operation/Pow.h"
-#include "ir/operation/Tile.h"
-#include "ir/operation/Range.h"
-#include "ir/operation/Rank.h"
-#include "ir/operation/BCQFullyConnected.h"
-#include "ir/operation/BCQGather.h"
-#include "ir/operation/MatrixBandPart.h"
-#include "ir/operation/BatchMatMul.h"
-#include "ir/operation/FusedBatchNorm.h"
-#include "ir/operation/LogSoftmax.h"
-#include "ir/operation/StatelessRandomUniform.h"
diff --git a/runtime/onert/core/include/ir/Operations.lst b/runtime/onert/core/include/ir/Operations.lst
index ccde4d179..7f3c40b4b 100644
--- a/runtime/onert/core/include/ir/Operations.lst
+++ b/runtime/onert/core/include/ir/Operations.lst
@@ -20,69 +20,69 @@
 
 // Internal Name
 OP(AddN)
+OP(ArgMinMax)
+OP(BatchMatMul)
 OP(BatchToSpaceND)
+OP(BCQFullyConnected)
+OP(BCQGather)
 OP(BinaryArithmetic)
 OP(BroadcastTo)
+OP(Comparison)
+OP(Concat)
 OP(Conv2D)
+OP(ConvertFp16ToFp32)
+OP(ConvertFp32ToFp16)
+OP(Custom)
+OP(DepthToSpace)
 OP(DepthwiseConv2D)
-OP(Pool2D)
-OP(Concat)
-OP(Fill)
-OP(FullyConnected)
-OP(Reduce)
-OP(Reshape)
-OP(Softmax)
-OP(Squeeze)
-OP(Slice)
-OP(StridedSlice)
-OP(Transpose)
+OP(Einsum)
 OP(ElementwiseActivation)
 OP(ElementwiseBinary)
 OP(ElementwiseUnary)
+OP(EmbeddingLookup)
 OP(ExpandDims)
-OP(Comparison)
+OP(Fill)
+OP(FullyConnected)
+OP(FusedBatchNorm)
+OP(Gather)
+OP(HashtableLookup)
+OP(If)
+OP(InstanceNorm)
+OP(L2Normalization)
+OP(LocalResponseNormalization)
+OP(LogSoftmax)
 OP(LSTM)
+OP(MatrixBandPart)
+OP(OneHot)
+OP(Pack)
+OP(Pad)
+OP(Permute)
+OP(Pool2D)
+OP(Pow)
+OP(PReLU)
+OP(Range)
+OP(Rank)
+OP(Reduce)
+OP(Reshape)
 OP(ResizeBilinear)
 OP(ResizeNearestNeighbor)
 OP(Reverse)
 OP(RNN)
+OP(Select)
+OP(Shape)
+OP(Slice)
+OP(Softmax)
 OP(SpaceToBatchND)
 OP(SpaceToDepth)
-OP(EmbeddingLookup)
-OP(L2Normalization)
-OP(HashtableLookup)
-OP(InstanceNorm)
-OP(PReLU)
-OP(TransposeConv)
-OP(SquaredDifference)
-OP(TopKV2)
-OP(Gather)
-OP(ArgMax)
-OP(Einsum)
-OP(LocalResponseNormalization)
-OP(DepthToSpace)
-OP(Pack)
-OP(Select)
 OP(Split)
 OP(SplitV)
+OP(SquaredDifference)
+OP(Squeeze)
+OP(StatelessRandomUniform)
+OP(StridedSlice)
+OP(Tile)
+OP(TopKV2)
+OP(Transpose)
+OP(TransposeConv)
 OP(Unpack)
-OP(Pad)
-OP(Custom)
-OP(Permute)
-OP(OneHot)
-OP(Shape)
-OP(ConvertFp32ToFp16)
-OP(ConvertFp16ToFp32)
-OP(If)
 OP(While)
-OP(Pow)
-OP(Tile)
-OP(Range)
-OP(Rank)
-OP(BCQFullyConnected)
-OP(BCQGather)
-OP(MatrixBandPart)
-OP(BatchMatMul)
-OP(FusedBatchNorm)
-OP(LogSoftmax)
-OP(StatelessRandomUniform)
diff --git a/runtime/onert/core/include/ir/Subgraphs.h b/runtime/onert/core/include/ir/Subgraphs.h
index 7b4c33b76..6cb369447 100644
--- a/runtime/onert/core/include/ir/Subgraphs.h
+++ b/runtime/onert/core/include/ir/Subgraphs.h
@@ -120,7 +120,7 @@ public:
    *
    * @return count of Subgraphs
    */
-  size_t count() { return _subgraphs.size(); }
+  size_t count() const { return _subgraphs.size(); }
 
   /**
    * @brief Return the primary subgraph
diff --git a/runtime/onert/core/include/ir/operation/ArgMax.h b/runtime/onert/core/include/ir/operation/ArgMinMax.h
index ea7eabb83..1c9fccd22 100644
--- a/runtime/onert/core/include/ir/operation/ArgMax.h
+++ b/runtime/onert/core/include/ir/operation/ArgMinMax.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_IR_OPERATION_ARG_MAX_H__
-#define __ONERT_IR_OPERATION_ARG_MAX_H__
+#ifndef __ONERT_IR_OPERATION_ARG_MIN_MAX_H__
+#define __ONERT_IR_OPERATION_ARG_MIN_MAX_H__
 
 #include "ir/Operation.h"
 
@@ -26,7 +26,7 @@ namespace ir
 namespace operation
 {
 
-class ArgMax : public Operation
+class ArgMinMax : public Operation
 {
 public:
   enum Input
@@ -38,15 +38,16 @@ public:
   struct Param
   {
     DataType output_type;
+    bool is_arg_max = true;
   };
 
 public:
-  ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-         const Param &param);
+  ArgMinMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+            const Param &param);
 
 public:
   void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::ArgMax; }
+  OpCode opcode() const final { return OpCode::ArgMinMax; }
 
 public:
   const Param &param() const { return _param; }
@@ -59,4 +60,4 @@ private:
 } // namespace ir
 } // namespace onert
 
-#endif // __ONERT_IR_OPERATION_ARG_MAX_H__
+#endif // __ONERT_IR_OPERATION_ARG_MIN_MAX_H__
diff --git a/runtime/onert/core/include/ir/operation/ElementwiseUnary.h b/runtime/onert/core/include/ir/operation/ElementwiseUnary.h
index c40778a56..7d6cb544a 100644
--- a/runtime/onert/core/include/ir/operation/ElementwiseUnary.h
+++ b/runtime/onert/core/include/ir/operation/ElementwiseUnary.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
-#define __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
+#ifndef __ONERT_IR_OPERATION_ELEMENTWISE_UNARY_H__
+#define __ONERT_IR_OPERATION_ELEMENTWISE_UNARY_H__
 
 #include "ir/Operation.h"
 
@@ -51,7 +51,7 @@ public:
     RSQRT,
     SIN,
     SQRT,
-    SQURE,
+    SQUARE,
     ZEROS_LIKE
   };
 
@@ -80,4 +80,4 @@ private:
 } // namespace ir
 } // namespace onert
 
-#endif // __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
+#endif // __ONERT_IR_OPERATION_ELEMENTWISE_UNARY_H__
diff --git a/runtime/onert/core/include/ir/operation/Fill.h b/runtime/onert/core/include/ir/operation/Fill.h
index 524e41385..b55c77ae5 100644
--- a/runtime/onert/core/include/ir/operation/Fill.h
+++ b/runtime/onert/core/include/ir/operation/Fill.h
@@ -31,7 +31,7 @@ class Fill : public Operation
 public:
   enum Input
   {
-    INPUT = 0,
+    SHAPE = 0,
     VALUE,
   };
 
diff --git a/runtime/onert/core/include/util/Config.lst b/runtime/onert/core/include/util/Config.lst
index 30f211011..5944f8344 100644
--- a/runtime/onert/core/include/util/Config.lst
+++ b/runtime/onert/core/include/util/Config.lst
@@ -20,7 +20,7 @@
 
 //     Name                    | Type         | Default
 CONFIG(GRAPH_DOT_DUMP          , int          , "0")
-CONFIG(BACKENDS                , std::string  , "cpu;acl_cl;acl_neon;bcq") // FIXME Remove bcq
+CONFIG(BACKENDS                , std::string  , "cpu;acl_cl;acl_neon;ruy;xnnpack;bcq") // FIXME Remove bcq
 CONFIG(OP_BACKEND_ALLOPS       , std::string  , "")
 CONFIG(OP_BACKEND_MAP          , std::string  , "")
 CONFIG(DISABLE_COMPILE         , bool         , "0")
@@ -35,6 +35,7 @@ CONFIG(OP_SEQ_MAX_NODE         , int          , "0")
 CONFIG(TRACE_FILEPATH          , std::string  , "")
 CONFIG(FP16_ENABLE             , bool         , "0")
 CONFIG(RUY_THREADS             , int          , "-1")
+CONFIG(XNNPACK_THREADS         , int          , "-1")
 CONFIG(USE_MMAPED_DATA         , bool         , "0")
 
 // Auto-generate all operations
diff --git a/runtime/onert/core/include/util/ConfigSource.h b/runtime/onert/core/include/util/ConfigSource.h
index b6a8144fd..da8bc8620 100644
--- a/runtime/onert/core/include/util/ConfigSource.h
+++ b/runtime/onert/core/include/util/ConfigSource.h
@@ -27,6 +27,7 @@ namespace util
 {
 
 void config_source(std::unique_ptr<IConfigSource> &&source);
+void config_source_ext(std::unique_ptr<IConfigSource> &&source);
 
 bool toBool(const std::string &val);
 int toInt(const std::string &val);
diff --git a/runtime/onert/core/include/util/ShapeInference.h b/runtime/onert/core/include/util/ShapeInference.h
index 701b835d2..b11da90ce 100644
--- a/runtime/onert/core/include/util/ShapeInference.h
+++ b/runtime/onert/core/include/util/ShapeInference.h
@@ -42,7 +42,7 @@ using Shapes = std::vector<ir::Shape>;
 
 // Define shape calculation for operations. List them in alphabetic order.
 
-ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank);
+ir::Shape inferArgMinMaxShape(const ir::Shape &input_shape, int axis, int rank);
 
 ir::Shape inferBatchMatMulShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape,
                                 const ir::operation::BatchMatMul::Param &param);
@@ -70,7 +70,7 @@ ir::Shape inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_sha
 
 ir::Shape inferExpandDimsShape(const ir::Shape &in_shape, int32_t axis);
 
-ir::Shape inferFillShape(const ir::Shape &in_shape, const int32_t *in_buf);
+template <typename T> ir::Shape inferFillShape(const ir::Shape &fill_shape, const T *shape_buf);
 
 ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape);
 
diff --git a/runtime/onert/core/include/util/TracingCtx.h b/runtime/onert/core/include/util/TracingCtx.h
new file mode 100644
index 000000000..a82704cf0
--- /dev/null
+++ b/runtime/onert/core/include/util/TracingCtx.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_TRACING_CTX_H__
+#define __ONERT_UTIL_TRACING_CTX_H__
+
+#include "ir/Graph.h"
+#include "ir/Index.h"
+#include "ir/Subgraphs.h"
+
+#include <unordered_map>
+#include <mutex>
+
+namespace onert
+{
+namespace util
+{
+
+/**
+ * @brief Class to maintain information about profiling per session
+ */
+class TracingCtx
+{
+public:
+  /**
+   * @brief Create and store unique session id managed by this class
+   *        Note that this constructor can be called by multiple sessions running in parallely.
+   *        Use this constructor only when there is only one subgraph in a model.
+   */
+  TracingCtx(const ir::Graph *primary_subgraph)
+  {
+    decideSessionID();
+    _subgraph_indices.emplace(primary_subgraph, 0);
+  }
+
+  /**
+   * @brief Create and store unique session id managed by this class
+   *        Note that this constructor can be called by multiple sessions running in parallely.
+   */
+  TracingCtx(const onert::ir::Subgraphs *subgraphs)
+  {
+    assert(subgraphs);
+
+    decideSessionID();
+
+    auto count = subgraphs->count();
+    for (size_t i = 0; i < count; i++)
+      _subgraph_indices.emplace(subgraphs->at(onert::ir::SubgraphIndex(i)).get(), i);
+  }
+
+  uint32_t getSessionId() const { return _session_id; }
+
+  /**
+   * @brief Set subgraph index of a graph
+   */
+  void setSubgraphIndex(const ir::Graph *g, uint32_t index) { _subgraph_indices.emplace(g, index); }
+
+  /**
+   * @brief Get subgraph index of a graph.
+   */
+  ir::SubgraphIndex getSubgraphIndex(const ir::Graph *g) const { return _subgraph_indices.at(g); }
+
+private:
+  void decideSessionID()
+  {
+    std::unique_lock<std::mutex> lock{_session_id_mutex};
+
+    static uint32_t next_session_id = 0;
+    _session_id = next_session_id++;
+  }
+
+private:
+  std::unordered_map<const ir::Graph *, ir::SubgraphIndex> _subgraph_indices;
+  uint32_t _session_id;
+  static std::mutex _session_id_mutex;
+};
+
+} // namespace util
+} // namespace onert
+
+#endif // __ONERT_UTIL_TRACING_CTX_H__
diff --git a/runtime/onert/core/include/util/logging.h b/runtime/onert/core/include/util/logging.h
index 76cfb8d60..65c375077 100644
--- a/runtime/onert/core/include/util/logging.h
+++ b/runtime/onert/core/include/util/logging.h
@@ -64,4 +64,11 @@ static Context &ctx = Context::get();
   if (::onert::util::logging::ctx.enabled()) \
   std::cout << "[" << __func__ << "] "
 
+#define WHEN_LOG_ENABLED(METHOD)             \
+  if (::onert::util::logging::ctx.enabled()) \
+    do                                       \
+    {                                        \
+      METHOD;                                \
+  } while (0)
+
 #endif // __ONERT_UTIL_LOGGING_H__
diff --git a/runtime/onert/core/src/backend/BackendContext.cc b/runtime/onert/core/src/backend/BackendContext.cc
index bafa36d28..404c3b155 100644
--- a/runtime/onert/core/src/backend/BackendContext.cc
+++ b/runtime/onert/core/src/backend/BackendContext.cc
@@ -17,7 +17,6 @@
 #include "backend/BackendContext.h"
 
 #include "ir/Operation.h"
-#include "backend/IConstantInitializer.h"
 
 namespace onert
 {
@@ -31,25 +30,5 @@ void BackendContext::initialize(const std::vector<OperationInfo> &operation_list
   _operand_list = operand_list;
 }
 
-void BackendContext::initConsts()
-{
-  for (auto &op : _operation_list)
-  {
-    constant_initializer->setLayout(op.layout);
-    _graph->operations().at(op.index).accept(*constant_initializer);
-  }
-
-  for (auto ind : _operand_list)
-  {
-    const auto &obj = _graph->operands().at(ind);
-    if (obj.isConstant() && !constant_initializer->exist(ind))
-    {
-      constant_initializer->registerDefaultInitializer(ind, obj);
-    }
-  }
-
-  constant_initializer->run();
-}
-
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/Backend.h b/runtime/onert/core/src/backend/controlflow/Backend.h
index cc8346e6b..3323cf5cb 100644
--- a/runtime/onert/core/src/backend/controlflow/Backend.h
+++ b/runtime/onert/core/src/backend/controlflow/Backend.h
@@ -72,8 +72,6 @@ public:
     context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
     context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb->dynamicTensorManager(), tr,
                                                             context->external_context());
-    context->tensor_register = nullptr;
-    context->optimizer = nullptr;
     return context;
   }
 
diff --git a/runtime/onert/core/src/backend/controlflow/BackendContext.cc b/runtime/onert/core/src/backend/controlflow/BackendContext.cc
new file mode 100644
index 000000000..366377edf
--- /dev/null
+++ b/runtime/onert/core/src/backend/controlflow/BackendContext.cc
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "KernelGenerator.h"
+#include "backend/cpu_common/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace controlflow
+{
+
+void BackendContext::initConsts()
+{
+  for (auto &op : operation_list())
+  {
+    constant_initializer->setLayout(op.layout);
+    graph()->operations().at(op.index).accept(*constant_initializer);
+  }
+
+  for (auto ind : operand_list())
+  {
+    const auto &obj = graph()->operands().at(ind);
+    if (obj.isConstant() && !constant_initializer->exist(ind))
+    {
+      constant_initializer->registerDefaultInitializer(ind, obj);
+    }
+  }
+
+  constant_initializer->run();
+}
+
+ITensorRegistry *BackendContext::genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                                            const ir::OpSequences &op_seqs,
+                                            const ir::LowerInfoMap &lower_info)
+{
+  auto model_io = (graph()->getInputs() + graph()->getOutputs()) | ir::Remove::UNDEFINED |
+                  ir::Remove::DUPLICATED;
+  for (auto index : operand_list())
+  {
+    if (model_io.contains(index))
+      continue;
+    const auto &obj = graph()->operands().at(index);
+    const auto frontend_layout = [&]() {
+      if (obj.getUses().size() == 0)
+        return ir::Layout::UNKNOWN;
+      auto use_op_ind = *obj.getUses().begin(); // FIXME What if it has two or more uses?
+      for (auto &operation_info : operation_list())
+      {
+        if (operation_info.index == use_op_ind)
+          return operation_info.layout;
+      }
+      return ir::Layout::UNKNOWN;
+    }();
+    const auto &permute_factor = lower_info.operand.at(index)->def_factors().getOnlyElement();
+    if (permute_factor.backend() != backend())
+      continue;
+    const auto backend_layout = permute_factor.layout();
+    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+    tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
+  }
+
+  // TODO Get compiler options from compiler, and use it rather than getting it from Env
+  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+  {
+    cpu_common::planTensors(*this, order, op_seqs, lower_info);
+  }
+  else
+  {
+    // For the executors that does not have fixed linear execution order:
+    // To make tensors never be deallocated, this is a workaround to use static memory planner
+    for (auto ind : operand_list())
+    {
+      if (tensor_builder->isRegistered(ind))
+        tensor_builder->notifyFirstUse(ind);
+    }
+  }
+
+  tensor_builder->prepare();
+
+  return tensor_registry.get();
+}
+
+FunctionMap BackendContext::genKernels(const std::vector<ir::OpSequenceIndex> &order,
+                                       const ir::OpSequences &op_seqs)
+{
+  FunctionMap ret;
+
+  for (auto op_seq_ind : order)
+  {
+    const auto &op_seq = op_seqs.at(op_seq_ind);
+    bool assigned = [&]() {
+      for (auto op_info : operation_list())
+        if (op_seq.exist(op_info.index))
+          return true;
+      return false;
+    }();
+    if (!assigned)
+      continue;
+    auto fn_seq = kernel_gen->generate(op_seqs.at(op_seq_ind));
+    ret.emplace_back(op_seq_ind, std::move(fn_seq));
+  }
+
+  initConsts();
+
+  // NOTE For memory optimization, we want to free some operand data
+  for (auto ind : operand_list())
+  {
+    // TODO Remove const_cast
+    auto &obj = const_cast<ir::Graph *>(graph())->operands().at(ind);
+    obj.releaseData();
+  }
+
+  for (auto &it : ret)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+  }
+
+  return ret;
+}
+
+} // namespace controlflow
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/BackendContext.h b/runtime/onert/core/src/backend/controlflow/BackendContext.h
index 3647338a0..a768d5d61 100644
--- a/runtime/onert/core/src/backend/controlflow/BackendContext.h
+++ b/runtime/onert/core/src/backend/controlflow/BackendContext.h
@@ -18,6 +18,9 @@
 #define __ONERT_BACKEND_CONTROLFLOW_BACKEND_CONTEXT_H__
 
 #include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "ConstantInitializer.h"
+#include "KernelGenerator.h"
 #include "ExternalContext.h"
 
 namespace onert
@@ -32,21 +35,36 @@ class BackendContext : public onert::backend::BackendContext
 public:
   BackendContext(const Backend *backend, const ir::Graph *graph,
                  std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
-                 std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
-                 std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
-                 std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
-                 std::shared_ptr<ITensorRegister> tensor_register = nullptr,
-                 std::shared_ptr<IOptimizer> optimizer = nullptr)
-      : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder,
-                                       constant_initializer, kernel_gen, tensor_register,
-                                       optimizer),
-        _external_context(std::make_shared<ExternalContext>())
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+      : onert::backend::BackendContext(backend, graph, tensor_registry),
+        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+        kernel_gen{kernel_gen}, _external_context(std::make_shared<ExternalContext>())
   {
   }
 
+  ITensorRegistry *genTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                              const ir::OpSequences &op_seqs,
+                              const ir::LowerInfoMap &lower_info) override;
+
+  FunctionMap genKernels(const std::vector<ir::OpSequenceIndex> &order,
+                         const ir::OpSequences &op_seqs) override;
+
   std::shared_ptr<ExternalContext> external_context() { return _external_context; }
 
 private:
+  void initConsts();
+  void planTensors(const std::vector<onert::ir::OpSequenceIndex> &order,
+                   const ir::OpSequences &op_seqs, const ir::LowerInfoMap &lower_info);
+
+public:
+  // TODO Make it private
+  std::shared_ptr<TensorBuilder> tensor_builder;
+  std::shared_ptr<ConstantInitializer> constant_initializer;
+  std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
   // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
   //      the thread pool is also created in duplicate
   // TODO Create one ruy context for session
diff --git a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
index e21a8f357..ac97ef91c 100644
--- a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
+++ b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
@@ -17,10 +17,7 @@
 #ifndef __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
 #define __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
 
-#include "TensorRegistry.h"
-
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
+#include <backend/cpu_common/ConstantInitializer.h>
 
 namespace onert
 {
@@ -29,21 +26,7 @@ namespace backend
 namespace controlflow
 {
 
-class ConstantInitializer : public IConstantInitializer
-{
-public:
-  ConstantInitializer(const ir::Operands &operands,
-                      const std::shared_ptr<ITensorRegistry> &tensor_reg)
-      : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
-  {
-  }
-
-private:
-  std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
-
-private:
-  std::shared_ptr<ITensorRegistry> _tensor_reg;
-};
+using ConstantInitializer = cpu_common::ConstantInitializer;
 
 } // namespace controlflow
 } // namespace backend
diff --git a/runtime/onert/core/src/backend/controlflow/ExternalContext.h b/runtime/onert/core/src/backend/controlflow/ExternalContext.h
index 3db6829a9..cfb983136 100644
--- a/runtime/onert/core/src/backend/controlflow/ExternalContext.h
+++ b/runtime/onert/core/src/backend/controlflow/ExternalContext.h
@@ -17,7 +17,6 @@
 #ifndef __ONERT_BACKEND_CONTROLFLOW_EXTERNAL_CONTEXT_H__
 #define __ONERT_BACKEND_CONTROLFLOW_EXTERNAL_CONTEXT_H__
 
-#include <backend/IExternalContext.h>
 #include <util/ConfigSource.h>
 
 #include <ruy/context.h>
@@ -38,7 +37,7 @@ namespace controlflow
 {
 
 // TODO Unify this with cpu::ExternalContext
-class ExternalContext : public IExternalContext
+class ExternalContext
 {
 public:
   ExternalContext() : _ruy_context(std::make_unique<ruy::Context>())
diff --git a/runtime/onert/core/src/backend/controlflow/IOTensor.cc b/runtime/onert/core/src/backend/controlflow/IOTensor.cc
new file mode 100644
index 000000000..47405ac9e
--- /dev/null
+++ b/runtime/onert/core/src/backend/controlflow/IOTensor.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IOTensor.h"
+
+#include <assert.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace controlflow
+{
+
+IOTensor::IOTensor(const ir::OperandInfo &info, ir::Layout layout)
+    : IPortableTensor{info}, _orig_info{info}, _orig_layout{layout}
+{
+  setUserTensor(nullptr, 0);
+}
+
+void IOTensor::setTensor(IPortableTensor *tensor)
+{
+  assert(tensor);
+  assert(tensor != this);
+  // TODO Handle when layout was changed
+  assert(tensor->layout() == _orig_layout); // Changing layout is not considered yet
+  _user_tensor.reset();
+  _tensor = tensor;
+}
+
+void IOTensor::setUserTensor(uint8_t *buffer, size_t size)
+{
+  _user_tensor = std::make_unique<UserTensor>(_orig_info, _orig_layout, buffer, size);
+  _tensor = _user_tensor.get();
+}
+
+} // namespace controlflow
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/backend/controlflow/IOTensor.h b/runtime/onert/core/src/backend/controlflow/IOTensor.h
new file mode 100644
index 000000000..a7ed84b6d
--- /dev/null
+++ b/runtime/onert/core/src/backend/controlflow/IOTensor.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__
+#define __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__
+
+#include "backend/IPortableTensor.h"
+#include "UserTensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace controlflow
+{
+
+/**
+ * @brief Tensor object that indirects to the tensor it is pointing to.
+ *
+ * A model I/O tensor could be two types.
+ *
+ * 1. @c UserTensor, if it is the primary graph
+ * 2. Any other derivative of @c IPortableTensor from another backend, otherwise
+ *
+ * To support these, this object indirects everything to the actual tensor pointer.
+ * Exceptionally if it is UserTensor, this class creates and manages it.
+ */
+class IOTensor : public IPortableTensor
+{
+public:
+  IOTensor(const ir::OperandInfo &info, ir::Layout layout);
+
+public:
+  void setTensor(IPortableTensor *tensor);
+  void setUserTensor(uint8_t *buffer, size_t size);
+  ir::OperandInfo orig_info() const { return _orig_info; }
+  ir::Layout orig_layout() const { return _orig_layout; }
+
+public:
+  uint8_t *buffer() const override { return _tensor->buffer(); }
+  size_t total_size() const override { return _tensor->total_size(); }
+  size_t dimension(size_t index) const override { return _tensor->dimension(index); }
+  size_t num_dimensions() const override { return _tensor->num_dimensions(); }
+  size_t calcOffset(const ir::Coordinates &coords) const override
+  {
+    return _tensor->calcOffset(coords);
+  }
+  ir::Layout layout() const override { return _tensor->layout(); }
+  ir::DataType data_type() const override { return _tensor->data_type(); }
+  float data_scale() const override { return _tensor->data_scale(); }
+  int32_t data_offset() const override { return _tensor->data_offset(); }
+  bool is_dynamic() const override { return _is_dynamic || (_tensor && _tensor->is_dynamic()); }
+  void set_dynamic() override { _is_dynamic = true; }
+  ir::Shape getShape() const override { return _tensor->getShape(); }
+  void setShape(const ir::Shape &shape) override
+  {
+    // Workaround for IPortableTensor holds _info as its member
+    _info.shape(shape);
+    _tensor->setShape(shape);
+  }
+  bool is_constant() const override { return _tensor->is_constant(); }
+  bool applyShape(const ir::Shape &shape) override
+  {
+    // Workaround for IPortableTensor holds _info as its member
+    _info.shape(shape);
+    return _tensor->applyShape(shape);
+  }
+
+private:
+  const ir::OperandInfo _orig_info;
+  const ir::Layout _orig_layout;
+  bool _is_dynamic{false};
+  IPortableTensor *_tensor{nullptr};        //< The actual tensor that is indirected
+  std::unique_ptr<UserTensor> _user_tensor; //< If it is a user tensor, it is managed by this object
+};
+
+} // namespace controlflow
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CONTROLFLOW_IO_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
index 8e39ee527..2606f044e 100644
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
+++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
@@ -31,7 +31,7 @@ namespace backend
 namespace controlflow
 {
 
-KernelGenerator::KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
+KernelGenerator::KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager,
                                  const std::shared_ptr<TensorRegistry> &tensor_reg,
                                  const std::shared_ptr<ExternalContext> &external_context)
     : _graph{graph}, _dyn_tensor_manager{dyn_tensor_manager}, _tensor_reg{tensor_reg},
@@ -77,18 +77,17 @@ void KernelGenerator::visit(const ir::operation::If &node)
   const auto then_subg_index = node.param().then_subg_index;
   const auto else_subg_index = node.param().else_subg_index;
 
-  std::vector<backend::ITensor *> input_tensors;
+  std::vector<backend::IPortableTensor *> input_tensors;
   for (const auto input_index : node.getInputs())
   {
-    auto input_tensor = getTensor(input_index);
-
+    auto input_tensor = getPortableTensor(input_index);
     input_tensors.emplace_back(input_tensor);
   }
 
-  std::vector<backend::ITensor *> output_tensors;
+  std::vector<backend::IPortableTensor *> output_tensors;
   for (const auto output_index : node.getOutputs())
   {
-    auto output_tensor = getTensor(output_index);
+    auto output_tensor = getPortableTensor(output_index);
     output_tensors.emplace_back(output_tensor);
   }
 
@@ -97,8 +96,8 @@ void KernelGenerator::visit(const ir::operation::If &node)
   const auto cond_tensor = input_tensors.front();
   input_tensors.erase(input_tensors.begin());
   auto fn = std::make_unique<::onert::backend::controlflow::kernel::IfLayer>(
-      cond_tensor, input_tensors, output_tensors, node.getOutputs(), _graph, then_subg_index,
-      else_subg_index, _executor_map, _external_context);
+      cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executor_map,
+      _external_context);
 
   _return_fn = std::move(fn);
 }
@@ -124,33 +123,40 @@ void KernelGenerator::visit(const ir::operation::While &node)
 
   // This op does not support input as a constant, because controlflow backend does not have
   // TensorBuilder
-  std::vector<backend::ITensor *> input_tensors;
+  std::vector<backend::IPortableTensor *> input_tensors;
   for (const auto input_index : node.getInputs())
   {
-    auto input_tensor = getTensor(input_index);
-
+    auto input_tensor = getPortableTensor(input_index);
     input_tensors.emplace_back(input_tensor);
   }
 
-  std::vector<backend::ITensor *> output_tensors;
+  std::vector<backend::IPortableTensor *> output_tensors;
   for (const auto output_index : node.getOutputs())
   {
-    auto output_tensor = getTensor(output_index);
+    auto output_tensor = getPortableTensor(output_index);
     output_tensors.emplace_back(output_tensor);
   }
 
   // WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of
   // creating executor recusively
   auto fn = std::make_unique<::onert::backend::controlflow::kernel::WhileLayer>(
-      input_tensors, output_tensors, node.getOutputs(), _graph, cond_subg_index, body_subg_index,
-      _executor_map, _external_context);
+      input_tensors, output_tensors, cond_subg_index, body_subg_index, _executor_map,
+      _dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context);
 
   _return_fn = std::move(fn);
 }
 
 backend::ITensor *KernelGenerator::getTensor(const ir::OperandIndex &index)
 {
-  backend::ITensor *ret = _tensor_registries.getITensor(index);
+  // get Tensor from all tensor registries (for Permute op)
+  auto ret = _tensor_registries.getITensor(index);
+  assert(ret != nullptr);
+  return ret;
+}
+
+backend::IPortableTensor *KernelGenerator::getPortableTensor(const ir::OperandIndex &index)
+{
+  auto ret = _tensor_reg->getPortableTensor(index);
   assert(ret != nullptr);
   return ret;
 }
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
index c2c124339..7b395d186 100644
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
+++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
@@ -17,13 +17,12 @@
 #ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__
 #define __ONERT_BACKEND_CONTROLFLOW_KERNEL_GENERATOR_H__
 
-#include <backend/IKernelGenerator.h>
-#include <backend/ITensorBuilder.h>
 #include <exec/IExecutor.h>
 #include "ExternalContext.h"
 #include <ir/Graph.h>
 #include "TensorBuilder.h"
 #include "compiler/TensorRegistries.h"
+#include "backend/cpu_common/KernelGeneratorBase.h"
 #include "TensorRegistry.h"
 
 namespace onert
@@ -33,10 +32,10 @@ namespace backend
 namespace controlflow
 {
 
-class KernelGenerator : public IKernelGenerator
+class KernelGenerator : public cpu_common::KernelGeneratorBase
 {
 public:
-  KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
+  KernelGenerator(const ir::Graph &graph, DynamicTensorManager *dyn_tensor_manager,
                   const std::shared_ptr<TensorRegistry> &tensor_reg,
                   const std::shared_ptr<ExternalContext> &external_context);
 
@@ -50,8 +49,6 @@ public:
     _executor_map = executor_map.get();
   }
 
-  using IKernelGenerator::visit;
-
   void visit(const ir::OpSequence &) override;
   void visit(const ir::operation::If &) override;
   void visit(const ir::operation::Permute &) override;
@@ -59,10 +56,11 @@ public:
 
 private:
   backend::ITensor *getTensor(const ir::OperandIndex &index);
+  backend::IPortableTensor *getPortableTensor(const ir::OperandIndex &index);
 
 private:
   const ir::Graph &_graph;
-  IDynamicTensorManager *_dyn_tensor_manager;
+  DynamicTensorManager *_dyn_tensor_manager;
   std::shared_ptr<TensorRegistry> _tensor_reg;
   compiler::TensorRegistries _tensor_registries;
   exec::ExecutorMap *_executor_map;
diff --git a/runtime/onert/core/src/backend/controlflow/Tensor.h b/runtime/onert/core/src/backend/controlflow/Tensor.h
index ba5bafd75..87951a9b3 100644
--- a/runtime/onert/core/src/backend/controlflow/Tensor.h
+++ b/runtime/onert/core/src/backend/controlflow/Tensor.h
@@ -27,6 +27,7 @@ namespace controlflow
 {
 
 using Tensor = cpu_common::Tensor;
+using ExternalTensor = cpu_common::ExternalTensor;
 
 } // namespace controlflow
 } // namespace backend
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
index e4b0388f9..a767f0eca 100644
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
+++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
@@ -30,8 +30,8 @@ namespace controlflow
 TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
     : _tensor_reg{tensor_reg},
       _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())},
-      _static_tensor_mgr{new cpu_common::StaticTensorManager(
-          _tensor_reg->base_reg(), _dynamic_tensor_mgr->dynamic_mem_mgr().get())}
+      _static_tensor_mgr{
+          new cpu_common::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
 {
   /* empty */
 }
@@ -90,11 +90,7 @@ bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
   return _tensor_info_map.find(ind) != _tensor_info_map.end();
 }
 
-void TensorBuilder::prepare(void)
-{
-  _static_tensor_mgr->allocateConsts();
-  _static_tensor_mgr->allocateNonconsts();
-}
+void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
 
 void TensorBuilder::allocate()
 {
@@ -102,7 +98,7 @@ void TensorBuilder::allocate()
   //      This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
 }
 
-IDynamicTensorManager *TensorBuilder::dynamicTensorManager(void)
+DynamicTensorManager *TensorBuilder::dynamicTensorManager(void)
 {
   return _dynamic_tensor_mgr.get();
 }
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
index 695994761..d2e3076fd 100644
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
+++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
@@ -21,7 +21,6 @@
 #include <backend/cpu_common/TensorRegistry.h>
 #include <backend/cpu_common/Tensor.h>
 
-#include <backend/ITensorBuilder.h>
 #include <ir/OperandIndexMap.h>
 
 #include <unordered_map>
@@ -35,7 +34,7 @@ namespace backend
 namespace controlflow
 {
 
-class TensorBuilder : public ITensorBuilder
+class TensorBuilder
 {
 public:
   TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
@@ -47,18 +46,18 @@ public:
    * @param[in] layout Operand data layout
    */
   void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                          ir::Layout backend_layout) override;
+                          ir::Layout backend_layout);
 
-  void notifyFirstUse(const ir::OperandIndex &) override;
-  void notifyLastUse(const ir::OperandIndex &) override;
+  void notifyFirstUse(const ir::OperandIndex &);
+  void notifyLastUse(const ir::OperandIndex &);
 
-  bool isRegistered(const ir::OperandIndex &) const override;
+  bool isRegistered(const ir::OperandIndex &) const;
 
-  void prepare(void) override;
-  void allocate() override;
-  void postFunctionPrepare() override { /* DO NOTHING */}
+  void prepare(void);
+  void allocate();
+  void postFunctionPrepare() { /* DO NOTHING */}
 
-  IDynamicTensorManager *dynamicTensorManager(void) override;
+  DynamicTensorManager *dynamicTensorManager(void);
 
   /**
    * @brief Get tensor with a specific OperandIndex.
diff --git a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
index 94f71bb9c..901f0aebb 100644
--- a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
+++ b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
@@ -20,7 +20,7 @@
 #include "backend/cpu_common/TensorRegistry.h"
 #include "backend/ITensorRegistry.h"
 #include "Tensor.h"
-#include "UserTensor.h"
+#include "IOTensor.h"
 #include <assert.h>
 
 namespace onert
@@ -36,9 +36,10 @@ namespace controlflow
  * This class contains three types of tensors. Two native tensors(tensors that are managed by this
  * backend) and the other is migrant tensor.
  *
- * - NativeUserTensor - @c UserTensor managed by this backend, buffer is user-given
- * - NativeOwnTensor  - @c cpu_common::Tensor managed by this backend ( in @c _base_reg )
- * - MigrantTensor    - @c IPortableTensor managed by other backends ( in @c _base_reg )
+ * - NativeIOTensor  - @c IOTensor managed by this backend ( in @c _base_reg )
+ *     - NOTE The tensor it actually points to can be from another backend
+ * - NativeOwnTensor - @c cpu_common::Tensor managed by this backend ( in @c _base_reg )
+ * - MigrantTensor   - @c IPortableTensor managed by other backends
  *
  * @note @c _base_reg is used in implementation to reuse @c cpu_common::StaticTensorManager
  *
@@ -53,7 +54,7 @@ public:
     auto base_tensor = _base_reg->getITensor(ind);
     if (base_tensor)
       return base_tensor;
-    return getNativeUserTensor(ind);
+    return getNativeIOTensor(ind);
   }
 
   ITensor *getNativeITensor(const ir::OperandIndex &ind) override
@@ -61,7 +62,7 @@ public:
     auto base_tensor = _base_reg->getNativeITensor(ind);
     if (base_tensor)
       return base_tensor;
-    return getNativeUserTensor(ind);
+    return getNativeIOTensor(ind);
   }
 
   IPortableTensor *getPortableTensor(const ir::OperandIndex &ind)
@@ -69,7 +70,7 @@ public:
     auto base_tensor = _base_reg->getPortableTensor(ind);
     if (base_tensor)
       return base_tensor;
-    return getNativeUserTensor(ind);
+    return getNativeIOTensor(ind);
   }
 
   IPortableTensor *getNativeTensor(const ir::OperandIndex &ind)
@@ -77,7 +78,7 @@ public:
     auto base_tensor = _base_reg->getNativeTensor(ind);
     if (base_tensor)
       return base_tensor;
-    return getNativeUserTensor(ind);
+    return getNativeIOTensor(ind);
   }
 
   Tensor *getNativeOwnTensor(const ir::OperandIndex &ind)
@@ -85,10 +86,10 @@ public:
     return _base_reg->getNativeTensor(ind);
   }
 
-  UserTensor *getNativeUserTensor(const ir::OperandIndex &ind)
+  IOTensor *getNativeIOTensor(const ir::OperandIndex &ind)
   {
-    auto tensor = _native_user_tensors.find(ind);
-    if (tensor != _native_user_tensors.end())
+    auto tensor = _native_io_tensors.find(ind);
+    if (tensor != _native_io_tensors.end())
       return tensor->second.get();
     return nullptr;
   }
@@ -108,22 +109,22 @@ public:
     _base_reg->setNativeTensor(ind, std::move(tensor));
   }
 
-  void setNativeUserTensor(ir::OperandIndex ind, std::unique_ptr<UserTensor> &&tensor)
+  void setNativeIOTensor(ir::OperandIndex ind, std::unique_ptr<IOTensor> &&tensor)
   {
     assert(tensor);
     assert(!getITensor(ind)); // For the ind, tensor is not registered yet
-    _native_user_tensors[ind] = std::move(tensor);
+    _native_io_tensors[ind] = std::move(tensor);
   }
 
-  const ir::OperandIndexMap<std::unique_ptr<UserTensor>> &native_user_tensors()
+  const ir::OperandIndexMap<std::unique_ptr<IOTensor>> &native_io_tensors()
   {
-    return _native_user_tensors;
+    return _native_io_tensors;
   }
   std::shared_ptr<cpu_common::TensorRegistry> base_reg() { return _base_reg; }
 
 private:
   std::shared_ptr<cpu_common::TensorRegistry> _base_reg;
-  ir::OperandIndexMap<std::unique_ptr<UserTensor>> _native_user_tensors;
+  ir::OperandIndexMap<std::unique_ptr<IOTensor>> _native_io_tensors;
 };
 
 } // namespace controlflow
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc
index de91b850a..1d786c4dd 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc
+++ b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.cc
@@ -18,7 +18,6 @@
 
 #include <backend/ITensor.h>
 #include "exec/ExecutorBase.h"
-#include <misc/polymorphic_downcast.h>
 #include "PermuteLayer.h"
 
 namespace onert
@@ -30,16 +29,15 @@ namespace controlflow
 namespace kernel
 {
 
-IfLayer::IfLayer(backend::ITensor *cond_tensor, const std::vector<backend::ITensor *> input_tensors,
-                 const std::vector<backend::ITensor *> output_tensors,
-                 const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
+IfLayer::IfLayer(backend::IPortableTensor *cond_tensor,
+                 const std::vector<backend::IPortableTensor *> input_tensors,
+                 const std::vector<backend::IPortableTensor *> output_tensors,
                  const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
                  exec::ExecutorMap *executor_map,
                  const std::shared_ptr<ExternalContext> &external_context)
     : _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors},
-      _output_indices{output_indices}, _graph{graph}, _then_subg_index{then_subg_index},
-      _else_subg_index{else_subg_index}, _executor_map{executor_map},
-      _external_context{external_context}
+      _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index},
+      _executor_map{executor_map}, _external_context{external_context}
 {
   // At this point, executor_map may not have executors of then subg and else subg
 }
@@ -48,79 +46,34 @@ void IfLayer::run()
 {
   // Check condition
   // // If true
-  // // // Copy _input_tensors -> then subg's inputs
-  // // // Run then subg
-  // // // Copy outputs of then subg -> _output_tensors
+  // // // Set _input_tensors -> then-subg's inputs
+  // // // Set outputs of then-subg -> _output_tensors
+  // // // Run then-subg
   // // Else
-  // // // Copy _input_tensors -> else subg's inputs if false
-  // // // Run else subg
-  // // // Copy outputs of else subg -> _output_tensors
-  auto getResultCond = [](backend::ITensor *tensor) -> bool {
+  // // // Set _input_tensors -> else-subg's inputs
+  // // // Set outputs of else-subg -> _output_tensors
+  // // // Run else-subg
+
+  auto getResultCond = [](backend::IPortableTensor *tensor) -> bool {
     bool ret = false;
     tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
     return ret;
   };
 
-  exec::ExecutorBase *subg_exec = nullptr;
+  exec::IExecutor *subg_exec = nullptr;
   bool cond_result = getResultCond(_cond_tensor);
   if (cond_result)
   {
     VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl;
-    subg_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
-        _executor_map->at(_then_subg_index).get());
+    subg_exec = _executor_map->at(_then_subg_index).get();
   }
   else
   {
     VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl;
-    subg_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
-        _executor_map->at(_else_subg_index).get());
-  }
-
-  const auto &subg_graph = subg_exec->graph();
-
-  std::vector<backend::ITensor *> src_tensors;
-  std::vector<backend::ITensor *> dst_tensors;
-  // Add tensors used in subgraph or contained in outputs of subgraph
-  assert(subg_graph.getInputs().size() == _input_tensors.size());
-  assert(subg_graph.getInputs().size() == subg_exec->getInputTensors().size());
-  for (uint32_t i = 0; i < subg_graph.getInputs().size(); ++i)
-  {
-    const auto &subg_input_index = subg_graph.getInputs().at(i);
-    const auto &subg_input = subg_graph.operands().at(subg_input_index);
-    if (subg_input.getUses().size() > 0 || subg_graph.getOutputs().contains(subg_input_index))
-    {
-      src_tensors.emplace_back(_input_tensors.at(i));
-      dst_tensors.emplace_back(subg_exec->getInputTensors().at(i));
-    }
+    subg_exec = _executor_map->at(_else_subg_index).get();
   }
-  const auto permute_op_input_to_subg_input =
-      std::make_shared<PermuteLayer>(src_tensors, dst_tensors, _external_context);
-
-  // Add tensors used as output of operation or contained in outputs of operation
-  src_tensors.clear();
-  dst_tensors.clear();
-  assert(_output_indices.size() == subg_exec->getOutputTensors().size());
-  assert(_output_indices.size() == _output_tensors.size());
-  for (uint32_t i = 0; i < _output_indices.size(); ++i)
-  {
-    const auto &output_index = _output_indices.at(i);
-    const auto &output = _graph.operands().at(output_index);
-    if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index))
-    {
-      src_tensors.emplace_back(subg_exec->getOutputTensors().at(i));
-      dst_tensors.emplace_back(_output_tensors.at(i));
-    }
-  }
-  const auto permute_subg_output_to_op_output =
-      std::make_shared<PermuteLayer>(src_tensors, dst_tensors, _external_context);
-
-  // Remove copying of unused tensor
-  permute_op_input_to_subg_input->prepare();
-  permute_subg_output_to_op_output->prepare();
 
-  // Copy & run
-  subg_exec->execute(_input_tensors, permute_op_input_to_subg_input);
-  permute_subg_output_to_op_output->run();
+  subg_exec->execute(_input_tensors, _output_tensors);
   VERBOSE(If) << "Return from $" << (cond_result ? _then_subg_index : _else_subg_index)
               << std::endl;
 }
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h
index 9e944bccc..967552fc3 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h
+++ b/runtime/onert/core/src/backend/controlflow/kernel/IfLayer.h
@@ -17,7 +17,7 @@
 #ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__
 #define __ONERT_BACKEND_CONTROLFLOW_KERNEL_IF_LAYER_H__
 
-#include <backend/ITensor.h>
+#include <backend/IPortableTensor.h>
 #include <exec/IExecutor.h>
 #include "../ExternalContext.h"
 
@@ -33,9 +33,9 @@ namespace kernel
 class IfLayer : public ::onert::exec::IFunction
 {
 public:
-  IfLayer(backend::ITensor *cond_tensor, const std::vector<backend::ITensor *> input_tensors,
-          const std::vector<backend::ITensor *> output_tensors,
-          const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
+  IfLayer(backend::IPortableTensor *cond_tensor,
+          const std::vector<backend::IPortableTensor *> input_tensors,
+          const std::vector<backend::IPortableTensor *> output_tensors,
           const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
           exec::ExecutorMap *executor_map,
           const std::shared_ptr<ExternalContext> &external_context);
@@ -44,11 +44,9 @@ public:
   void run() override;
 
 private:
-  backend::ITensor *_cond_tensor;
-  const std::vector<backend::ITensor *> _input_tensors;
-  const std::vector<backend::ITensor *> _output_tensors;
-  const ir::OperandIndexSequence &_output_indices;
-  const ir::Graph &_graph;
+  backend::IPortableTensor *_cond_tensor;
+  const std::vector<backend::IPortableTensor *> _input_tensors;
+  const std::vector<backend::IPortableTensor *> _output_tensors;
   const ir::SubgraphIndex _then_subg_index;
   const ir::SubgraphIndex _else_subg_index;
   exec::ExecutorMap *_executor_map;
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h
index 5d0f1918e..6fb69b65c 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h
+++ b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.h
@@ -17,7 +17,6 @@
 #ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__
 #define __ONERT_BACKEND_CONTROLFLOW_KERNEL_PERMUTELAYER_H__
 
-#include "backend/ITensorBuilder.h"
 #include "exec/IPermuteFunction.h"
 #include "exec/IExecutor.h"
 #include "../ExternalContext.h"
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc
index a0d478603..a4b5aa5ca 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc
+++ b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.cc
@@ -16,6 +16,7 @@
 
 #include "WhileLayer.h"
 
+#include <algorithm>
 #include <backend/ITensor.h>
 #include "exec/ExecutorBase.h"
 #include <misc/polymorphic_downcast.h>
@@ -30,16 +31,15 @@ namespace controlflow
 namespace kernel
 {
 
-WhileLayer::WhileLayer(const std::vector<backend::ITensor *> input_tensors,
-                       const std::vector<backend::ITensor *> output_tensors,
-                       const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
+WhileLayer::WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
+                       const std::vector<backend::IPortableTensor *> output_tensors,
                        const ir::SubgraphIndex &cond_subg_index,
                        const ir::SubgraphIndex &body_subg_index, exec::ExecutorMap *executor_map,
+                       cpu_common::DynamicMemoryManager *dyn_memory_manager,
                        const std::shared_ptr<ExternalContext> &external_context)
     : _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index},
-      _output_indices{output_indices}, _graph{graph}, _input_tensors{input_tensors},
-      _output_tensors{output_tensors}, _executor_map{executor_map},
-      _external_context{external_context}
+      _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executor_map{executor_map},
+      _dyn_memory_manager{dyn_memory_manager}, _external_context{external_context}
 {
   // At this point, executor_map may not have executors of cond subg and body subg
 }
@@ -56,164 +56,90 @@ void WhileLayer::run()
   // // Run cond subg
   // If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" ->
   // "_dst_tensors"
-  auto cond_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
-      _executor_map->at(_cond_subg_index).get());
-  auto body_exec = nnfw::misc::polymorphic_downcast<exec::ExecutorBase *>(
-      _executor_map->at(_body_subg_index).get());
-
-  const auto &cond_graph = cond_exec->graph();
-  const auto &body_graph = body_exec->graph();
-
-  std::vector<backend::ITensor *> input_tensors;
-  std::vector<backend::ITensor *> cond_input_tensors;
-  std::vector<backend::ITensor *> body_input_tensors;
-  std::vector<backend::ITensor *> body_output_tensors;
-  std::vector<backend::ITensor *> output_tensors;
-
-  // Add only used tensors in cond subgraph
-  assert(cond_graph.getInputs().size() == _input_tensors.size());
-  assert(cond_graph.getInputs().size() == cond_exec->getInputTensors().size());
-  for (uint32_t i = 0; i < cond_graph.getInputs().size(); ++i)
-  {
-    const auto &cond_input = cond_graph.operands().at(cond_graph.getInputs().at(i));
-    if (cond_input.getUses().size() > 0)
-    {
-      input_tensors.emplace_back(_input_tensors.at(i));
-      cond_input_tensors.emplace_back(cond_exec->getInputTensors().at(i));
-    }
-  }
-  const auto permute_op_input_to_cond_input =
-      std::make_shared<PermuteLayer>(input_tensors, cond_input_tensors, _external_context);
-
-  // Add only used tensors among outputs of while operation
-  assert(_output_indices.size() == _input_tensors.size());
-  assert(_output_indices.size() == _output_tensors.size());
-  input_tensors.clear();
-  output_tensors.clear();
-  for (size_t i = 0; i < _output_indices.size(); ++i)
-  {
-    const auto &output_index = _output_indices.at(i);
-    const auto &output = _graph.operands().at(output_index);
-    if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index))
-    {
-      input_tensors.emplace_back(_input_tensors.at(i));
-      output_tensors.emplace_back(_output_tensors.at(i));
-    }
-  }
-  const auto permute_op_input_to_op_output =
-      std::make_shared<PermuteLayer>(input_tensors, output_tensors, _external_context);
-
-  // Add all tensors with unused tensors in body subgraph because unused input tensors will be
-  // copied output tensors in body subgraph
-  assert(_input_tensors.size() == body_exec->getInputTensors().size());
-  input_tensors = _input_tensors;
-  body_input_tensors = body_exec->getInputTensors();
-  const auto permute_op_input_to_body_input =
-      std::make_shared<PermuteLayer>(input_tensors, body_input_tensors, _external_context);
-
-  // Add only used tensors in cond subgraph
-  assert(cond_graph.getInputs().size() == body_exec->getOutputTensors().size());
-  assert(cond_graph.getInputs().size() == cond_exec->getInputTensors().size());
-  body_output_tensors.clear();
-  cond_input_tensors.clear();
-  for (uint32_t i = 0; i < cond_graph.getInputs().size(); ++i)
-  {
-    const auto &cond_input = cond_graph.operands().at(cond_graph.getInputs().at(i));
-    if (cond_input.getUses().size() > 0)
-    {
-      body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i));
-      cond_input_tensors.emplace_back(cond_exec->getInputTensors().at(i));
-    }
-  }
-  const auto permute_body_output_to_cond_input =
-      std::make_shared<PermuteLayer>(body_output_tensors, cond_input_tensors, _external_context);
-
-  // Add only used tensors in body subgraph
-  assert(body_graph.getInputs().size() == body_exec->getOutputTensors().size());
-  assert(body_graph.getInputs().size() == body_exec->getInputTensors().size());
-  body_output_tensors.clear();
-  body_input_tensors.clear();
-  for (uint32_t i = 0; i < body_graph.getInputs().size(); ++i)
-  {
-    const auto &body_input_index = body_graph.getInputs().at(i);
-    const auto &body_input = body_graph.operands().at(body_input_index);
-    if (body_input.getUses().size() > 0 &&
-        !body_exec->graph().getOutputs().contains(body_input_index))
-    {
-      body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i));
-      body_input_tensors.emplace_back(body_exec->getInputTensors().at(i));
-    }
-  }
-  const auto permute_body_output_to_body_input =
-      std::make_shared<PermuteLayer>(body_output_tensors, body_input_tensors, _external_context);
-
-  // Add only used tensors among outputs of while operation
-  assert(_output_indices.size() == body_exec->getOutputTensors().size());
-  assert(_output_indices.size() == _output_tensors.size());
-  body_output_tensors.clear();
-  output_tensors.clear();
-  for (size_t i = 0; i < _output_indices.size(); ++i)
-  {
-    const auto &output_index = _output_indices.at(i);
-    const auto &output = _graph.operands().at(output_index);
-    if (output.getUses().size() > 0 || _graph.getOutputs().contains(output_index))
-    {
-      body_output_tensors.emplace_back(body_exec->getOutputTensors().at(i));
-      output_tensors.emplace_back(_output_tensors.at(i));
-    }
-  }
-  const auto permute_body_output_to_op_output =
-      std::make_shared<PermuteLayer>(body_output_tensors, output_tensors, _external_context);
+  auto cond_exec = _executor_map->at(_cond_subg_index).get();
+  auto body_exec = _executor_map->at(_body_subg_index).get();
 
-  // Remove copying of unused tensor
-  permute_op_input_to_cond_input->prepare();
-  permute_op_input_to_op_output->prepare();
-  permute_op_input_to_body_input->prepare();
-  permute_body_output_to_cond_input->prepare();
-  permute_body_output_to_body_input->prepare();
-  permute_body_output_to_op_output->prepare();
+  // Need a temp tensor to hold the cond subgraph output
+  assert(cond_exec->getOutputTensors().size() == 1);
+  auto cond_output_tensor = [&]() {
+    auto cond_output = cond_exec->getOutputTensors().at(0);
+    auto tensor = std::make_unique<Tensor>(cond_output->orig_info(), cond_output->orig_layout(),
+                                           _dyn_memory_manager);
+    tensor->set_dynamic();
+    tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size()));
+    return tensor;
+  }();
 
   VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
-  cond_exec->execute(_input_tensors, permute_op_input_to_cond_input);
+  cond_exec->execute(_input_tensors, {cond_output_tensor.get()});
   VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
 
-  assert(cond_exec->getOutputTensors().size() == 1);
-  auto &cond_output_tensor = cond_exec->getOutputTensors().at(0);
   auto getResultCond = [](backend::ITensor *tensor) -> bool {
     bool ret = false;
     tensor->access([&](ITensor &tensor) { ret = *reinterpret_cast<bool *>(tensor.buffer()); });
     return ret;
   };
 
+  std::vector<ITensor *> op_inputs(_input_tensors.begin(), _input_tensors.end());
+  std::vector<ITensor *> op_outputs(_output_tensors.begin(), _output_tensors.end());
+  // Copying body inputs to outputs when the loop body is never executed
+  if (!getResultCond(cond_output_tensor.get()))
+  {
+    PermuteLayer copy_body_inputs_to_op_outputs{op_inputs, op_outputs, _external_context};
+    copy_body_inputs_to_op_outputs.run();
+    return;
+  }
+
+  // Need some temp tensors to hold the body subgraph output
+  std::vector<std::unique_ptr<Tensor>> temp_outputs_o;
+  std::vector<IPortableTensor *> temp_outputs;
+  for (auto io_tensor : body_exec->getOutputTensors())
+  {
+    auto tensor = std::make_unique<Tensor>(io_tensor->orig_info(), io_tensor->orig_layout(),
+                                           _dyn_memory_manager);
+    tensor->set_dynamic();
+    tensor->setBuffer(_dyn_memory_manager->allocate(tensor.get(), tensor->total_size()));
+    temp_outputs.push_back(tensor.get());
+    temp_outputs_o.push_back(std::move(tensor));
+  }
+
+  std::vector<ITensor *> body_outputs(temp_outputs.begin(), temp_outputs.end());
+  PermuteLayer copy_body_outputs_to_op_outputs{body_outputs, op_outputs, _external_context};
+
   const auto body_execute_with_op_inputs = [&]() {
     VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
-    body_exec->execute(_input_tensors, permute_op_input_to_body_input);
+    body_exec->execute(_input_tensors, temp_outputs);
     VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
   };
 
   const auto body_execute_with_body_outputs = [&]() {
     VERBOSE(While) << "Call to $" << _body_subg_index << " (body)" << std::endl;
-    body_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_body_input);
+    body_exec->execute(_output_tensors, temp_outputs);
     VERBOSE(While) << "Return from $" << _body_subg_index << std::endl;
   };
 
   std::function<void()> body_execute = body_execute_with_op_inputs;
   const auto cond_execute = [&]() {
     VERBOSE(While) << "Call to $" << _cond_subg_index << " (cond)" << std::endl;
-    cond_exec->execute(body_exec->getOutputTensors(), permute_body_output_to_cond_input);
+    cond_exec->execute(_output_tensors, {cond_output_tensor.get()});
     VERBOSE(While) << "Return from $" << _cond_subg_index << std::endl;
   };
-  auto permute_to_outputs_fn = permute_op_input_to_op_output;
 
   // Loop while Cond subgraph's output is true
-  while (getResultCond(cond_output_tensor))
+  while (getResultCond(cond_output_tensor.get()))
   {
     body_execute();
+    copy_body_outputs_to_op_outputs.run();
     cond_execute();
     body_execute = body_execute_with_body_outputs;
-    permute_to_outputs_fn = permute_body_output_to_op_output;
   }
-  permute_to_outputs_fn->run();
+
+  // Clean-up the temp tensors
+  _dyn_memory_manager->deallocate(cond_output_tensor.get());
+  for (auto tensor : temp_outputs)
+  {
+    _dyn_memory_manager->deallocate(tensor);
+  }
 }
 
 } // namespace kernel
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h
index 8f82bd973..d3924c843 100644
--- a/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h
+++ b/runtime/onert/core/src/backend/controlflow/kernel/WhileLayer.h
@@ -17,13 +17,15 @@
 #ifndef __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__
 #define __ONERT_BACKEND_CONTROLFLOW_KERNEL_WHILE_LAYER_H__
 
-#include <backend/ITensor.h>
+#include <backend/IPortableTensor.h>
 #include <exec/IExecutor.h>
 #include <exec/IFunction.h>
 #include <ir/OperandIndexSequence.h>
 #include <ir/Graph.h>
 #include "../ExternalContext.h"
 
+#include "backend/cpu_common/MemoryManager.h"
+
 namespace onert
 {
 namespace backend
@@ -36,11 +38,10 @@ namespace kernel
 class WhileLayer : public ::onert::exec::IFunction
 {
 public:
-  WhileLayer(const std::vector<backend::ITensor *> input_tensors,
-             const std::vector<backend::ITensor *> output_tensors,
-             const ir::OperandIndexSequence &output_indices, const ir::Graph &graph,
+  WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
+             const std::vector<backend::IPortableTensor *> output_tensors,
              const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index,
-             exec::ExecutorMap *executor_map,
+             exec::ExecutorMap *executor_map, cpu_common::DynamicMemoryManager *dyn_memory_manager,
              const std::shared_ptr<ExternalContext> &external_context);
 
 public:
@@ -49,11 +50,10 @@ public:
 private:
   const ir::SubgraphIndex _cond_subg_index;
   const ir::SubgraphIndex _body_subg_index;
-  const ir::OperandIndexSequence &_output_indices;
-  const ir::Graph &_graph;
-  const std::vector<backend::ITensor *> _input_tensors;
-  const std::vector<backend::ITensor *> _output_tensors;
+  const std::vector<backend::IPortableTensor *> _input_tensors;
+  const std::vector<backend::IPortableTensor *> _output_tensors;
   exec::ExecutorMap *_executor_map;
+  cpu_common::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors
   const std::shared_ptr<ExternalContext> _external_context;
 };
 
diff --git a/runtime/onert/core/src/backend/cpu_common/BackendContextHelpers.cc b/runtime/onert/core/src/backend/cpu_common/BackendContextHelpers.cc
new file mode 100644
index 000000000..732b03ce8
--- /dev/null
+++ b/runtime/onert/core/src/backend/cpu_common/BackendContextHelpers.cc
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/cpu_common/BackendContextHelpers.h"
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/core/src/backend/cpu_common/ConstantInitializer.cc
index 6f6eb77bc..610ba5ffc 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.cc
+++ b/runtime/onert/core/src/backend/cpu_common/ConstantInitializer.cc
@@ -14,19 +14,19 @@
  * limitations under the License.
  */
 
-#include "ConstantInitializer.h"
-#include "Tensor.h"
+#include "backend/cpu_common/ConstantInitializer.h"
+#include "backend/cpu_common/Tensor.h"
 
 namespace onert
 {
 namespace backend
 {
-namespace cpu
+namespace cpu_common
 {
 
 ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
                                          const std::shared_ptr<ITensorRegistry> &tensor_reg)
-    : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
+    : ConstantInitializerBase{operands}, _tensor_reg{tensor_reg}
 {
   // DO NOTHING
 }
@@ -53,42 +53,6 @@ void ConstantInitializer::registerExternalInitializer(const ir::OperandIndex &in
   };
 }
 
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
-  const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
-  const auto &kernel_obj = _operands.at(kernel_index);
-  registerExternalInitializer(kernel_index, kernel_obj);
-
-  const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
-  const auto &bias_obj = _operands.at(bias_index);
-  registerExternalInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
-  const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
-  const auto &kernel_obj = _operands.at(kernel_index);
-  registerExternalInitializer(kernel_index, kernel_obj);
-
-  const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
-  const auto &bias_obj = _operands.at(bias_index);
-  registerExternalInitializer(bias_index, bias_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
-  const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
-  const auto &weight_obj = _operands.at(weight_index);
-  registerExternalInitializer(weight_index, weight_obj);
-
-  const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
-  if (!bias_index.undefined())
-  {
-    const auto &bias_obj = _operands.at(bias_index);
-    registerExternalInitializer(bias_index, bias_obj);
-  }
-}
-
-} // namespace cpu
+} // namespace cpu_common
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/core/src/backend/IConstantInitializer.cc b/runtime/onert/core/src/backend/cpu_common/ConstantInitializerBase.cc
index 6fb9757e0..15c2dfeb1 100644
--- a/runtime/onert/core/src/backend/IConstantInitializer.cc
+++ b/runtime/onert/core/src/backend/cpu_common/ConstantInitializerBase.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "backend/IConstantInitializer.h"
+#include "backend/cpu_common/ConstantInitializerBase.h"
 
 #include <Half.h>
 
@@ -24,9 +24,11 @@ namespace onert
 {
 namespace backend
 {
+namespace cpu_common
+{
 
-void IConstantInitializer::registerCopyInitializer(const ir::OperandIndex &index,
-                                                   const ir::Operand &obj)
+void ConstantInitializerBase::registerCopyInitializer(const ir::OperandIndex &index,
+                                                      const ir::Operand &obj)
 {
   // For only CONSTANTS
   // TODO Add to check if tensor has been allocated
@@ -67,8 +69,8 @@ void IConstantInitializer::registerCopyInitializer(const ir::OperandIndex &index
   }
 }
 
-void IConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &index,
-                                                      const ir::Operand &obj)
+void ConstantInitializerBase::registerPermuteInitializer(const ir::OperandIndex &index,
+                                                         const ir::Operand &obj)
 {
   // For only CONSTANTS
   // TODO Add to check if tensor has been allocated
@@ -82,27 +84,27 @@ void IConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &in
   switch (type)
   {
     case DataType::FLOAT32:
-      _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_op_seq_layout);
+      _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_layout);
       break;
     case DataType::INT32:
-      _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_op_seq_layout);
+      _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_layout);
       break;
     case DataType::UINT32:
-      _init_map[index] = std::bind(permuteInit<uint32_t>, _1, _2, _current_op_seq_layout);
+      _init_map[index] = std::bind(permuteInit<uint32_t>, _1, _2, _current_layout);
       break;
     case DataType::BOOL8:
     case DataType::QUANT_UINT8_ASYMM:
-      _init_map[index] = std::bind(permuteInit<uint8_t>, _1, _2, _current_op_seq_layout);
+      _init_map[index] = std::bind(permuteInit<uint8_t>, _1, _2, _current_layout);
       break;
     case DataType::QUANT_INT8_SYMM:
     case DataType::QUANT_INT8_ASYMM:
-      _init_map[index] = std::bind(permuteInit<int8_t>, _1, _2, _current_op_seq_layout);
+      _init_map[index] = std::bind(permuteInit<int8_t>, _1, _2, _current_layout);
       break;
     case DataType::FLOAT16:
-      _init_map[index] = std::bind(permuteInit<float16>, _1, _2, _current_op_seq_layout);
+      _init_map[index] = std::bind(permuteInit<float16>, _1, _2, _current_layout);
       break;
     case DataType::INT64:
-      _init_map[index] = std::bind(permuteInit<int64_t>, _1, _2, _current_op_seq_layout);
+      _init_map[index] = std::bind(permuteInit<int64_t>, _1, _2, _current_layout);
       break;
     default:
       throw std::runtime_error("Not supported, yet");
@@ -110,5 +112,6 @@ void IConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &in
   }
 }
 
+} // namespace cpu_common
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
index cac43babe..8c5c46a08 100644
--- a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
+++ b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
@@ -17,6 +17,7 @@
 #include "backend/cpu_common/StaticTensorManager.h"
 
 #include "backend/cpu_common/DynamicTensorManager.h"
+#include "backend/cpu_common/Tensor.h"
 #include <util/logging.h>
 
 namespace onert
@@ -27,31 +28,13 @@ namespace cpu_common
 {
 
 StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
-                                         DynamicMemoryManager *dynamic_mem_mgr)
-    : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg},
-      _dynamic_mem_mgr{dynamic_mem_mgr}
+                                         DynamicTensorManager *dynamic_tensor_manager)
+    : _nonconst_mgr{new MemoryManager()}, _tensors{reg},
+      _dynamic_tensor_manager{dynamic_tensor_manager}
 {
   // DO NOTHING
 }
 
-void StaticTensorManager::allocateConsts(void)
-{
-  for (auto &pair : _tensors->native_tensors())
-  {
-    const auto &ind = pair.first;
-    auto tensor = pair.second.get();
-    if (_as_constants[ind])
-    {
-      auto mem_alloc = _const_mgr->allocate(_tensors->getITensor(ind), tensor->total_size());
-      tensor->setBuffer(mem_alloc);
-      auto buffer = mem_alloc->base();
-      VERBOSE(CPU_COMMON_StaticTensorManager) << "CONSTANT TENSOR(#" << ind.value()
-                                              << "): " << static_cast<void *>(buffer)
-                                              << "size : " << tensor->total_size() << std::endl;
-    }
-  }
-}
-
 void StaticTensorManager::allocateNonconsts(void)
 {
   _nonconst_mgr->allocate();
@@ -65,14 +48,12 @@ void StaticTensorManager::allocateNonconsts(void)
       auto *buffer = _nonconst_mgr->getBuffer(ind);
       tensor->setBuffer(buffer);
 
-      VERBOSE(CPU_COMMON_StaticTensorManager) << "TENSOR(#" << ind.value()
-                                              << "): " << static_cast<void *>(buffer) << std::endl;
+      VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value()
+                                       << "): " << static_cast<void *>(buffer) << std::endl;
     }
   }
 }
 
-void StaticTensorManager::deallocateConsts(void) { _const_mgr->deallocate(); }
-
 void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
 
 void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
@@ -80,8 +61,17 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
                                       bool as_const)
 {
   assert(!_tensors->getNativeTensor(ind));
-  auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout, _dynamic_mem_mgr);
-  _tensors->setNativeTensor(ind, std::move(tensor));
+  if (as_const)
+  {
+    auto tensor = std::make_unique<ExternalTensor>(tensor_info, backend_layout);
+    _tensors->setNativeTensor(ind, std::move(tensor));
+  }
+  else
+  {
+    auto tensor = std::make_unique<Tensor>(tensor_info, backend_layout,
+                                           _dynamic_tensor_manager->dynamic_mem_mgr().get());
+    _tensors->setNativeTensor(ind, std::move(tensor));
+  }
   _as_constants[ind] = as_const;
 }
 
diff --git a/runtime/onert/core/src/backend/cpu_common/Tensor.cc b/runtime/onert/core/src/backend/cpu_common/Tensor.cc
index d3dcf9a6d..e412cb775 100644
--- a/runtime/onert/core/src/backend/cpu_common/Tensor.cc
+++ b/runtime/onert/core/src/backend/cpu_common/Tensor.cc
@@ -95,3 +95,20 @@ bool Tensor::applyShape(const ir::Shape &new_shape)
 } // namespace cpu_common
 } // namespace backend
 } // namespace onert
+
+// ExternalTensor
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu_common
+{
+
+// `dynamic_cast` not working across library boundaries on NDK
+// With this as a key function, `dynamic_cast` works across dl
+ExternalTensor::~ExternalTensor() {}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/BackendManager.cc b/runtime/onert/core/src/compiler/BackendManager.cc
index 0093f50fd..ea45cbeb7 100644
--- a/runtime/onert/core/src/compiler/BackendManager.cc
+++ b/runtime/onert/core/src/compiler/BackendManager.cc
@@ -69,55 +69,73 @@ void BackendManager::loadBackend(const std::string &backend)
     return;
   }
 
-  // TODO Remove indentation
+  const std::string backend_so = "libbackend_" + backend + SHARED_LIB_EXT;
+  void *handle = dlopen(backend_so.c_str(), RTLD_LAZY | RTLD_LOCAL);
+
+  if (handle == nullptr)
   {
-    const std::string backend_so = "libbackend_" + backend + SHARED_LIB_EXT;
-    void *handle = dlopen(backend_so.c_str(), RTLD_LAZY | RTLD_LOCAL);
+    VERBOSE(BackendManager) << "Failed to load backend '" << backend << "' - " << dlerror() << "\n";
+    return;
+  }
 
-    if (handle == nullptr)
+  VERBOSE(BackendManager) << "Successfully loaded '" << backend << "'(" << backend_so << ")\n";
+
+  {
+    // load object creator function
+    auto backend_create = (backend_create_t)dlsym(handle, "onert_backend_create");
+    if (backend_create == nullptr)
     {
-      VERBOSE_F() << "Failed to load backend '" << backend << "' - " << dlerror() << std::endl;
+      // TODO replace `fprintf` with `VERBOSE`
+      fprintf(stderr, "BackendManager: unable to find function `onert_backend_create` : %s\n",
+              dlerror());
+      dlclose(handle);
       return;
     }
 
-    VERBOSE_F() << "Successfully loaded '" << backend << "' - " << backend_so << "\n";
-
+    // load object creator function
+    auto backend_destroy = (backend_destroy_t)dlsym(handle, "onert_backend_destroy");
+    if (backend_destroy == nullptr)
     {
-      // load object creator function
-      auto backend_create = (backend_create_t)dlsym(handle, "onert_backend_create");
-      if (backend_create == nullptr)
-      {
-        fprintf(stderr, "BackendManager: unable to open function onert_backend_create : %s\n",
-                dlerror());
-        abort();
-      }
-
-      // load object creator function
-      auto backend_destroy = (backend_destroy_t)dlsym(handle, "onert_backend_destroy");
-      if (backend_destroy == nullptr)
-      {
-        fprintf(stderr, "BackendManager: unable to open function onert_backend_destroy : %s\n",
-                dlerror());
-        abort();
-      }
-
-      auto backend_object =
-          std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy);
-      bool initialized = backend_object->config()->initialize(); // Call initialize here?
-      if (!initialized)
-      {
-        VERBOSE_F() << backend.c_str() << " backend initialization failed. Don't use this backend"
-                    << std::endl;
-        dlclose(handle);
-        return;
-      }
-      _gen_map.emplace(backend_object->config()->id(), std::move(backend_object));
+      // TODO replace `fprintf` with `VERBOSE`
+      fprintf(stderr, "BackendManager: unable to find `function onert_backend_destroy` : %s\n",
+              dlerror());
+      dlclose(handle);
+      return;
     }
 
-    // Save backend handle (avoid warning by handle lost without dlclose())
-    auto u_handle = std::unique_ptr<void, dlhandle_destroy_t>{handle, [](void *h) { dlclose(h); }};
-    _handle_map.emplace(backend, std::move(u_handle));
+    auto backend_object =
+        std::unique_ptr<backend::Backend, backend_destroy_t>(backend_create(), backend_destroy);
+    bool initialized = backend_object->config()->initialize(); // Call initialize here?
+    if (!initialized)
+    {
+      VERBOSE(BackendManager) << backend.c_str()
+                              << " backend initialization failed. Don't use this backend"
+                              << std::endl;
+      dlclose(handle);
+      return;
+    }
+    _gen_map.emplace(backend_object->config()->id(), std::move(backend_object));
   }
+
+  // Save backend handle (avoid warning by handle lost without dlclose())
+
+  // NOTE This is a workaround for clang-format3.9 (seems like it does not understand
+  //      "by-copy capture with an initializer"
+  // clang-format off
+  auto u_handle = std::unique_ptr<void, dlhandle_destroy_t>{
+      handle, [id = backend, filename = backend_so](void *h) {
+        if (dlclose(h) == 0)
+        {
+          VERBOSE(BackendManager) << "Successfully unloaded '" << id << "'(" << filename << ")\n";
+        }
+        else
+        {
+          VERBOSE(BackendManager)
+              << "Failed to unload backend '" << id << "'- " << dlerror() << "\n";
+        }
+      }};
+// clang-format on
+_handle_map.emplace(backend, std::move(u_handle));
 }
 
 backend::Backend *BackendManager::get(const std::string &key)
diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc
index c2844bd7c..7eeb14ad3 100644
--- a/runtime/onert/core/src/compiler/Compiler.cc
+++ b/runtime/onert/core/src/compiler/Compiler.cc
@@ -41,6 +41,30 @@
 #include "ir/OperationDumper.h"
 #include "misc/string_helpers.h"
 
+namespace
+{
+
+using namespace onert;
+
+std::string getOpBackends(std::unordered_map<ir::OpCode, std::string> &opcode_to_backend)
+{
+  std::unordered_map<ir::OpCode, std::string>::iterator it;
+  std::string opbackends;
+
+  for (it = opcode_to_backend.begin(); it != opcode_to_backend.end(); ++it)
+  {
+    if (!opbackends.empty())
+      opbackends = opbackends + ", ";
+
+    auto opcode = it->first;
+    const std::string opname = ir::toString(opcode);
+    opbackends += opname + "=" + it->second;
+  }
+  return opbackends;
+}
+
+} // namespace
+
 namespace onert
 {
 
@@ -51,7 +75,6 @@ CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs)
 {
   CompilerOptions options;
   options.backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
-  options.is_primary_subgraph = false;
   options.trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
   options.graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
   options.op_seq_max_node = util::getConfigInt(util::config::OP_SEQ_MAX_NODE);
@@ -108,13 +131,15 @@ CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs)
   return options;
 }
 
-Compiler::Compiler(const std::shared_ptr<ir::Subgraphs> &subgs)
+Compiler::Compiler(const std::shared_ptr<ir::Subgraphs> &subgs, util::TracingCtx *tracing_ctx)
     : _subgraphs{subgs}, _state{State::CREATED}
 {
   // Set default values for CompilerOptions
   // All these default values should not be fetched from Env, when we stop supporting Android NN
   // API.
   _options = fetchCompilerOptionsFromGlobalConfig(*subgs);
+
+  _options.tracing_ctx = tracing_ctx;
 }
 
 void Compiler::enableToFp16() { _options.fp16_enable = true; }
@@ -132,12 +157,10 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
 {
   // Set control flow backend for control flow operators
   {
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] =
-        backend::controlflow::Config::ID;
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] =
-        backend::controlflow::Config::ID;
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] =
-        backend::controlflow::Config::ID;
+    auto &cfid = backend::controlflow::Config::ID;
+    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = cfid;
+    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = cfid;
+    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = cfid;
   }
 
   // FIXME This is a workaround for bcq operations, should remove it
@@ -157,7 +180,11 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
     VERBOSE(Compiler) << "graph_dump_level         : " << _options.graph_dump_level << std::endl;
     VERBOSE(Compiler) << "op_seq_max_node          : " << _options.op_seq_max_node << std::endl;
     VERBOSE(Compiler) << "executor                 : " << _options.executor << std::endl;
-    VERBOSE(Compiler) << "manual_scheduler_options : (Too many things to print)" << std::endl;
+    VERBOSE(Compiler) << "manual backend_for_all   : "
+                      << _options.manual_scheduler_options.backend_for_all << std::endl;
+    VERBOSE(Compiler) << "manual_scheduler_options : "
+                      << getOpBackends(_options.manual_scheduler_options.opcode_to_backend)
+                      << std::endl;
     VERBOSE(Compiler) << "he_scheduler             : " << _options.he_scheduler << std::endl;
     VERBOSE(Compiler) << "he_profiling_mode        : " << _options.he_profiling_mode << std::endl;
     VERBOSE(Compiler) << "disable_compile          : " << _options.disable_compile << std::endl;
@@ -202,7 +229,6 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
   // Lower: Assign backend
   std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs;
   _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
-    _options.is_primary_subgraph = (index == ir::SubgraphIndex{0});
     onert::dumper::dot::DotDumper dot_dumper(subg, dump_level);
     dot_dumper.dump(nnfw::misc::str("before_lower_subg-", index.value()));
 
@@ -230,6 +256,14 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
 
   _subgraphs.reset();
 
+  for (auto &pair : lowered_subgs)
+  {
+    const auto &subg_index = pair.first;
+    auto &lowered_subg = pair.second;
+    onert::dumper::dot::DotDumper dot_dumper_lowered(lowered_subg.get(), dump_level);
+    dot_dumper_lowered.dump("after_lower_subg-" + std::to_string(subg_index.value()));
+  }
+
   // Shape inference.
   {
     const auto primary_subg_idx = ir::SubgraphIndex{0};
@@ -266,12 +300,8 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
     auto &lowered_subg = pair.second;
     auto indexed_ranks = lowered_subg->indexed_ranks();
 
-    _options.is_primary_subgraph = (subg_index == ir::SubgraphIndex{0});
-
-    onert::dumper::dot::DotDumper dot_dumper_lowered(lowered_subg.get(), dump_level);
-    dot_dumper_lowered.dump("after_lower_subg-" + std::to_string(subg_index.value()));
-
-    ir::OperationDumper dumper("START SUBGRAPH " + std::to_string(subg_index.value()));
+    ir::OperationDumper dumper("Executor generation of Subgraph " +
+                               std::to_string(subg_index.value()));
     lowered_subg->graph().operations().iterate(
         [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
     auto executor = std::unique_ptr<exec::IExecutor>{
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc
index bb325ffbc..356feed7c 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.cc
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -16,6 +16,7 @@
 
 #include "ExecutorFactory.h"
 
+#include <deque>
 #include <functional>
 #include "exec/ExecutionObservers.h"
 #include "exec/LinearExecutor.h"
@@ -25,16 +26,13 @@
 #include "compiler/ExecutionBuilder.h"
 #include "exec/ExecTime.h"
 #include "compiler/Linear.h"
-#include "compiler/TensorBuilders.h"
-#include "backend/IConstantInitializer.h"
-#include "backend/IKernelGenerator.h"
-#include "backend/IOptimizer.h"
 #include "backend/IPortableTensor.h"
-#include "backend/ITensorRegister.h"
 #include "backend/controlflow/Config.h"
 #include "backend/controlflow/KernelGenerator.h"
 #include "backend/controlflow/UserTensor.h"
 #include "backend/controlflow/TensorBuilder.h"
+#include "util/TracingCtx.h"
+
 #include <memory>
 
 namespace onert
@@ -66,6 +64,36 @@ private:
   std::shared_ptr<backend::IConfig> _config;
 };
 
+void initializeSubgraphIOTensors(compiler::LoweredGraph &lowered_graph,
+                                 const ir::OperandIndexSequence &indices)
+{
+  // TODO Store controlflow backend in BackendContext
+  std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
+  for (const auto &e : lowered_graph.backend_contexts())
+  {
+    auto backend = e.first;
+    auto &context = e.second;
+    if (backend->config()->id() == backend::controlflow::Config::ID)
+    {
+      cf_tensor_reg =
+          std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
+    }
+  }
+  assert(cf_tensor_reg);
+
+  for (auto ind : indices)
+  {
+    const auto &operand = lowered_graph.graph().operands().at(ind);
+    auto tensor = std::make_unique<backend::controlflow::IOTensor>(
+        operand.info(),
+        ir::Layout::NHWC /* FIXME find op_seq for this operand and use frontend_layout */
+        );
+
+    // Add tensor to controlflow TensorRegistry.
+    cf_tensor_reg->setNativeIOTensor(ind, std::move(tensor));
+  }
+}
+
 } // namespace
 } // namespace onert
 
@@ -134,97 +162,6 @@ void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_g
   }
 }
 
-void ExecutorFactory::runTensorRegistration(compiler::LoweredGraph *lowered_graph,
-                                            const std::vector<ir::OpSequenceIndex> &order)
-{
-  for (const auto index : order)
-  {
-    const auto &op_seq = lowered_graph->op_seqs().at(index);
-    const auto backend = lowered_graph->getLowerInfo(index)->backend();
-    const auto tensor_register = lowered_graph->backend_contexts().at(backend)->tensor_register;
-    auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
-    auto model_io = lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs();
-
-    if (tensor_register)
-    {
-      // Custom registration
-      tensor_register->registerTensors(op_seq, lowered_graph->getLowerInfo());
-    }
-    else
-    {
-      // Default registration
-      for (const auto op_idx : op_seq)
-      {
-        const auto &op = lowered_graph->graph().operations().at(op_idx);
-        for (const auto &index :
-             (op.getInputs() | ir::Remove::UNDEFINED) + (op.getOutputs() | ir::Remove::UNDEFINED))
-        {
-          if (!tensor_builder->isRegistered(index) && !model_io.contains(index))
-          {
-            const auto &operand_lower_info =
-                lowered_graph->getLowerInfo(index)->def_factors().getOnlyElement();
-
-            // E.g., permute (CPU) -> tensor A -> MaxPool2D(acl_cl)
-            // op.getOutputs() of permute (CPU) returns tensor A
-            // but tensor A belongs to the backend of acl_cl.
-            // So, we have to make this tensor NOT registered for CPU.
-            if (operand_lower_info.backend() != backend)
-              continue;
-
-            const auto &obj = lowered_graph->graph().operands().at(index);
-            const auto frontend_layout = op_seq.getLayout();
-            const auto backend_layout = operand_lower_info.layout();
-            ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
-                                         obj.typeInfo(), obj.info().memAllocType(),
-                                         obj.isConstant()};
-            tensor_builder->registerTensorInfo(index, backend_info, backend_layout);
-          }
-        }
-      }
-    }
-  }
-}
-
-std::vector<backend::ITensor *>
-ExecutorFactory::initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
-                                          const ir::OperandIndexSequence &indices)
-{
-  std::vector<backend::ITensor *> ret;
-
-  // TODO Store controlflow backend in BackendContext
-  std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder;
-  std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
-  for (const auto &e : lowered_graph.backend_contexts())
-  {
-    auto backend = e.first;
-    auto &context = e.second;
-    if (backend->config()->id() == backend::controlflow::Config::ID)
-    {
-      cf_tensor_builder =
-          std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(context->tensor_builder);
-      cf_tensor_reg =
-          std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
-    }
-  }
-  assert(cf_tensor_builder);
-  assert(cf_tensor_reg);
-
-  for (auto ind : indices)
-  {
-    const auto &operand = lowered_graph.graph().operands().at(ind);
-    auto tensor = std::make_unique<backend::controlflow::UserTensor>(
-        operand.info(),
-        ir::Layout::NHWC /* FIXME find op_seq for this operand and use frontend_layout */
-        );
-
-    // Add tensor to controlflow TensorRegistry.
-    cf_tensor_reg->setNativeUserTensor(ind, std::move(tensor));
-    auto *itensor = cf_tensor_reg->getITensor(ind);
-    ret.push_back(itensor);
-  }
-  return ret;
-}
-
 void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph)
 {
   TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true};
@@ -260,110 +197,78 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
 
   initializeBackendContext(lowered_graph.get());
 
-  // linearize
-  assert(!lowered_graph->graph().isBuildingPhase());
-
-  /*************************************************
-   * Backend dependent analysis & optimization phase
-   *************************************************/
-
-  for (auto &pair : backend_contexts)
-  {
-    auto &optimizer = pair.second->optimizer;
-    if (optimizer)
-      optimizer->optimize();
-  }
+  TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
 
-  /**********************************************************
-   * Backend dependent analysis & optimization phase finished
-   **********************************************************/
+  assert(!lowered_graph->graph().isBuildingPhase());
 
-  /***********************
-   * Code generation phase
-   ***********************/
+  initializeSubgraphIOTensors(
+      *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
+                          ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
 
+  // linearize
   auto order = Linear::linearize(*lowered_graph);
-  runTensorRegistration(lowered_graph.get(), order);
-
-  std::vector<backend::ITensor *> input_tensors;
-  std::vector<backend::ITensor *> output_tensors;
-  if (options.is_primary_subgraph)
-  {
-    input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs());
-    output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs());
-  }
-
   Linear::dump(*lowered_graph, order);
-  Linear::planTensors(*lowered_graph, order);
 
-  TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
-  TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
-
-  for (auto &tensor_builder : tensor_builders)
+  for (auto &pair : backend_contexts)
   {
-    tensor_builder->prepare();
+    pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo());
   }
 
   prepareMigrantTensors(*lowered_graph);
 
-  ExecutionBuilder builder;
-
-  // Generate kernels
-  lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index,
-                                        const ir::OpSequence &op_seq) {
-    auto lower_info = lowered_graph->getLowerInfo(op_seq_index);
-    auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen;
-    // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow
-    auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get());
-    if (cf_kernel_gen != nullptr)
+  // Give some runtime objects to controlflow KernelGenerator
+  for (auto &pair : backend_contexts)
+  {
+    auto cf_context = dynamic_cast<backend::controlflow::BackendContext *>(pair.second.get());
+    if (cf_context != nullptr)
     {
+      auto cf_kernel_gen = cf_context->kernel_gen;
       cf_kernel_gen->setTensorRegistries(tensor_regs);
       cf_kernel_gen->setExecutorMap(executor_map);
     }
-    auto fn_seq = kernel_gen->generate(op_seq);
-    if (options.he_profiling_mode)
-    {
-      fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
-    }
-    builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)});
-  });
-
-  for (auto &tensor_builder : tensor_builders)
-  {
-    tensor_builder->allocate();
   }
 
+  ExecutionBuilder builder;
+
+  // Adjust the order of backends for the upcoming iteration
+  std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
   for (auto &pair : backend_contexts)
   {
-    pair.second->initConsts();
+    // NOTE controlflow backend must be processed lastly.
+    // This is because of Permute layer's specialty which is the only operation that could have
+    // different ITensor objects for the input and the output. And it requires all other backends'
+    // tensors are ready to use.
+    if (pair.first->config()->id() == "controlflow")
+      ordered_contexts.emplace_back(pair.first, pair.second.get());
+    else
+      ordered_contexts.emplace_front(pair.first, pair.second.get());
   }
 
-  lowered_graph->graph().operands().iterate(
-      [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
-  auto code_map = builder.releaseCodeMap();
-
-  for (auto &it : code_map)
+  // Generate kernels
+  for (auto &pair : ordered_contexts)
   {
-    auto op_seq_index = it.first;
-    auto &fn_seq = it.second.fn_seq;
-
-    fn_seq->iterate([&](exec::IFunction &ifunc) {
-      ifunc.prepare();
-      auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend();
-      auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
-      tensor_builder->postFunctionPrepare();
-    });
+    auto codes = pair.second->genKernels(order, lowered_graph->op_seqs());
+    for (auto &pair : codes)
+    {
+      auto &op_seq_ind = pair.first;
+      auto &fn_seq = pair.second;
+      auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind);
+      auto lower_info = lowered_graph->getLowerInfo(op_seq_ind);
+      if (options.he_profiling_mode)
+        fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+      builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)});
+    }
   }
 
-  auto exec =
-      new exec::LinearExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
-                               std::move(code_map),      order};
+  auto code_map = builder.releaseCodeMap();
+
+  auto exec = new exec::LinearExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map),
+                                       order, options.tracing_ctx};
 
   if (!options.trace_filepath.empty())
   {
-    std::unique_ptr<exec::IExecutionObserver> ctp =
-        std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath, exec->graph());
+    std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
+        options.trace_filepath, exec->graph(), options.tracing_ctx);
     exec->addObserver(std::move(ctp));
   }
 
@@ -378,100 +283,81 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
 
   initializeBackendContext(lowered_graph.get());
 
-  auto order = Linear::linearize(*lowered_graph);
-  runTensorRegistration(lowered_graph.get(), order);
-
-  std::vector<backend::ITensor *> input_tensors;
-  std::vector<backend::ITensor *> output_tensors;
-  if (options.is_primary_subgraph)
-  {
-    input_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getInputs());
-    output_tensors = initializeModelIOTensors(*lowered_graph, lowered_graph->graph().getOutputs());
-  }
-
-  TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
   TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
 
-  // To make tensors never be deallocated, this is a workaround to use static memory planner
-  for (auto &tensor_builder : tensor_builders)
-  {
-    lowered_graph->graph().operands().iterate(
-        [&](const ir::OperandIndex &ind, const ir::Operand &) {
-          if (tensor_builder->isRegistered(ind))
-          {
-            tensor_builder->notifyFirstUse(ind);
-          }
-        });
-  }
+  assert(!lowered_graph->graph().isBuildingPhase());
+
+  initializeSubgraphIOTensors(
+      *lowered_graph, (lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
+                          ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
 
-  for (auto &tensor_builder : tensor_builders)
+  // linearize
+  // This order is just for giving topological order info to the backens
+  // TODO When we pass a partial graph to a backend, we can remove this
+  auto order = Linear::linearize(*lowered_graph);
+  for (auto &pair : backend_contexts)
   {
-    tensor_builder->prepare();
+    pair.second->genTensors(order, lowered_graph->op_seqs(), *lowered_graph->getLowerInfo());
   }
 
   prepareMigrantTensors(*lowered_graph);
 
-  ExecutionBuilder builder;
-
-  // Generate kernels
-  lowered_graph->iterateTopolOpSeqs([&](const ir::OpSequenceIndex &op_seq_index,
-                                        const ir::OpSequence &op_seq) {
-    auto lower_info = lowered_graph->getLowerInfo(op_seq_index);
-    auto kernel_gen = lowered_graph->backend_contexts().at(lower_info->backend())->kernel_gen;
-    // Set TensorBuilderSet and ExecutorMap to kernel_gen of control flow
-    auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get());
-    if (cf_kernel_gen != nullptr)
+  // Give some runtime objects to controlflow KernelGenerator
+  for (auto &pair : backend_contexts)
+  {
+    auto cf_context = dynamic_cast<backend::controlflow::BackendContext *>(pair.second.get());
+    if (cf_context != nullptr)
     {
-      assert(cf_kernel_gen != nullptr);
+      auto cf_kernel_gen = cf_context->kernel_gen;
       cf_kernel_gen->setTensorRegistries(tensor_regs);
       cf_kernel_gen->setExecutorMap(executor_map);
     }
-    auto fn_seq = kernel_gen->generate(op_seq);
-    if (options.he_profiling_mode)
-    {
-      fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
-    }
-    builder.append(op_seq_index, {&op_seq, lower_info, std::move(fn_seq)});
-  });
-
-  for (const auto &tensor_builder : tensor_builders)
-  {
-    tensor_builder->allocate();
   }
 
+  ExecutionBuilder builder;
+
+  // Adjust the order of backends for the upcoming iteration
+  std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
   for (auto &pair : backend_contexts)
   {
-    pair.second->initConsts();
+    // NOTE controlflow backend must be processed lastly.
+    // This is because of Permute layer's specialty which is the only operation that could have
+    // different ITensor objects for the input and the output. And it requires all other backends'
+    // tensors are ready to use.
+    if (pair.first->config()->id() == "controlflow")
+      ordered_contexts.emplace_back(pair.first, pair.second.get());
+    else
+      ordered_contexts.emplace_front(pair.first, pair.second.get());
   }
 
-  lowered_graph->graph().operands().iterate(
-      [](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
-  auto code_map = builder.releaseCodeMap();
-
-  for (auto &it : code_map)
+  // Generate kernels
+  for (auto &pair : ordered_contexts)
   {
-    auto op_seq_index = it.first;
-    auto &fn_seq = it.second.fn_seq;
-
-    fn_seq->iterate([&](exec::IFunction &ifunc) {
-      ifunc.prepare();
-      auto backend = lowered_graph->getLowerInfo(op_seq_index)->backend();
-      auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
-      tensor_builder->postFunctionPrepare();
-    });
+    auto codes = pair.second->genKernels(order, lowered_graph->op_seqs());
+    for (auto &pair : codes)
+    {
+      auto &op_seq_ind = pair.first;
+      auto &fn_seq = pair.second;
+      auto &op_seq = lowered_graph->op_seqs().at(op_seq_ind);
+      auto lower_info = lowered_graph->getLowerInfo(op_seq_ind);
+      if (options.he_profiling_mode)
+        fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
+      builder.append(op_seq_ind, {&op_seq, lower_info, std::move(fn_seq)});
+    }
   }
 
+  auto code_map = builder.releaseCodeMap();
+
   exec::ExecutorBase *exec = nullptr;
   if (parallel)
   {
-    exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors, output_tensors,
-                                      tensor_regs, std::move(code_map)};
+    exec = new exec::ParallelExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map),
+                                      options.tracing_ctx};
   }
   else
   {
-    auto dataflow_exec = new exec::DataflowExecutor{
-        std::move(lowered_graph), input_tensors, output_tensors, tensor_regs, std::move(code_map)};
+    auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), tensor_regs,
+                                                    std::move(code_map), options.tracing_ctx};
     if (options.he_profiling_mode)
     {
       std::vector<const backend::Backend *> backends;
@@ -489,8 +375,8 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
 
   if (!options.trace_filepath.empty())
   {
-    std::unique_ptr<exec::IExecutionObserver> ctp =
-        std::make_unique<exec::ChromeTracingObserver>(options.trace_filepath, exec->graph());
+    std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
+        options.trace_filepath, exec->graph(), options.tracing_ctx);
     exec->addObserver(std::move(ctp));
   }
 
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h
index e76b721ea..06dc691db 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.h
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.h
@@ -46,9 +46,6 @@ private:
   static void initializeBackendContext(compiler::LoweredGraph *lowered_graph);
   static void runTensorRegistration(compiler::LoweredGraph *lowered_graph,
                                     const std::vector<ir::OpSequenceIndex> &order);
-  static std::vector<backend::ITensor *>
-  initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
-                           const ir::OperandIndexSequence &indices);
   static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph);
   static exec::IExecutor *
   createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc
index 30c8f72a5..fdd2a7653 100644
--- a/runtime/onert/core/src/compiler/Linear.cc
+++ b/runtime/onert/core/src/compiler/Linear.cc
@@ -19,8 +19,6 @@
 #include "Linear.h"
 
 #include "backend/IConfig.h"
-#include "backend/IConstantInitializer.h"
-#include "backend/ITensorRegister.h"
 #include "backend/Backend.h"
 #include "util/logging.h"
 
@@ -62,190 +60,5 @@ void Linear::dump(const compiler::LoweredGraph &lowered_graph,
   }
 }
 
-void Linear::planTensors(const compiler::LoweredGraph &lowered_graph,
-                         const std::vector<ir::OpSequenceIndex> &order)
-{
-  const auto &graph = lowered_graph.graph();
-  ir::OperandIndexMap<std::shared_ptr<backend::ITensorBuilder>> tensor_builder_map;
-
-  ir::OperandIndexMap<uint32_t> uses_map;
-  ir::OperandIndexMap<uint32_t> def_map;
-  ir::OperandIndexSequence constants;
-
-  // Prepare scanning
-  graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
-    const auto lower_info = lowered_graph.getLowerInfo(ind);
-    // TODO Remove if onert doesn't support anymore such as
-    // GeneratedTests.reshape_quant8_weights_as_inputs
-    if (lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0 &&
-        !graph.getInputs().contains(ind))
-    {
-      VERBOSE(LINEAR) << "Operand #" << ind.value() << " will not be used. no more process."
-                      << std::endl;
-      return;
-    }
-
-    // Unused input of subgraph
-    // TODO Register unused input as nullptr in tensor_builder
-    if (lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0 &&
-        graph.getInputs().contains(ind))
-    {
-      VERBOSE(LINEAR) << "Operand #" << ind.value() << " will not be used. no more process."
-                      << std::endl;
-      return;
-    }
-
-    uses_map[ind] = obj.getUses().size();
-    def_map[ind] = obj.getDef().valid() ? 1 : 0;
-
-    bool is_const = obj.isConstant();
-    if (is_const)
-    {
-      constants.append(ind);
-    }
-
-    auto factor = lower_info->def_factors().getOnlyElement();
-    auto backend = factor.backend();
-    auto tensor_builder = lowered_graph.backend_contexts().at(backend)->tensor_builder;
-    if (!tensor_builder->isRegistered(ind))
-    {
-      // These tensors do not exist in any op_seq (No use and def)
-      const auto info = obj.info();
-      const auto backend_layout = factor.layout();
-      // TODO Change tensor info to have permuted shape
-      tensor_builder->registerTensorInfo(ind, info, backend_layout);
-    }
-
-    tensor_builder_map[ind] = tensor_builder;
-  });
-
-  const auto io_tensors =
-      (graph.getInputs() + graph.getOutputs()) | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
-
-  // If a tensor is model output, increase the use of the tensor.
-  // This aim is same to above one.
-  for (const auto &ind : io_tensors)
-  {
-    uses_map[ind]++;
-  }
-
-  // Start scanning to do notify{First|Last}Use for each tensor
-
-  // If a tensor is a constant, increase the use of the tensor.
-  // It makes the tensor not be dealloced. It means these will be deallocated last.
-  // And allocate constant operands first
-  VERBOSE(LINEAR) << "TENSORS as CONSTANT" << std::endl;
-  for (const auto &ind : constants)
-  {
-    uses_map[ind]++;
-    tensor_builder_map[ind]->notifyFirstUse(ind);
-  }
-
-  // Allocate Model's inputs
-  VERBOSE(LINEAR) << "TENSORS as MODEL INPUT" << std::endl;
-  for (const auto &ind : graph.getInputs() | ir::Remove::DUPLICATED)
-  {
-    auto tensor_builder = tensor_builder_map[ind];
-    if (!tensor_builder) // for GeneratedTests.xxx_weights_as_inputs
-      continue;
-    tensor_builder->notifyFirstUse(ind);
-  }
-
-  // At each operation,
-  // 1. Scan DEF of outputs. If the DEF, allocate it
-  // 2. Scan DEF of inputs. If variable tensor, allocate it
-  // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
-  VERBOSE(LINEAR) << "TENSORS" << std::endl;
-  for (const auto op_seq_ind : order)
-  {
-    const auto &op_seq = lowered_graph.op_seqs().at(op_seq_ind);
-    for (const auto &op_idx : op_seq.operations())
-    {
-      for (const auto &ind : graph.operations().at(op_idx).getOutputs() | ir::Remove::DUPLICATED |
-                                 ir::Remove::UNDEFINED)
-      {
-        assert(def_map.find(ind) != def_map.end());
-        if (def_map[ind])
-        {
-          def_map[ind] = 0;
-          tensor_builder_map[ind]->notifyFirstUse(ind);
-        }
-      }
-
-      // Scan variable tensors
-      // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
-      // non-constant because of less memory usage by memory planning in here
-      for (const auto &ind : graph.operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED |
-                                 ir::Remove::UNDEFINED)
-      {
-        const auto &operand = graph.operands().at(ind);
-        if (operand.info().isVariable())
-        {
-          // The variable tensor with buffer is not supported yet
-          assert(operand.data() == nullptr);
-          assert(operand.getUses().size() == 1 && !operand.getDef().valid());
-          assert(lowered_graph.getLowerInfo(ind)->def_factors().size() == 1 &&
-                 lowered_graph.getLowerInfo(ind)->use_factors().size() == 1);
-          assert(uses_map[ind] == 1 && def_map[ind] == 0);
-          tensor_builder_map[ind]->notifyFirstUse(ind);
-        }
-      }
-
-      for (const auto &ind : graph.operations().at(op_idx).getInputs() | ir::Remove::DUPLICATED |
-                                 ir::Remove::UNDEFINED)
-      {
-        assert(uses_map.find(ind) != uses_map.end());
-        assert(uses_map[ind] > 0);
-        uses_map[ind]--;
-        if (uses_map[ind] == 0)
-        {
-          // plan for deallocation of static tensornode
-          tensor_builder_map[ind]->notifyLastUse(ind);
-
-          // plan for deallocation of dynamic tensor
-          auto dyn_tensor_manager = tensor_builder_map[ind]->dynamicTensorManager();
-          if (dyn_tensor_manager)
-          {
-            const auto *backend =
-                lowered_graph.getLowerInfo(ind)->def_factors().getOnlyElement().backend();
-            auto &tensor_registry = lowered_graph.backend_contexts().at(backend)->tensor_registry;
-            auto *tensor = tensor_registry->getITensor(ind);
-            assert(tensor);
-            if (!io_tensors.contains(ind)) // I/O tensors cannot be deallocated
-              dyn_tensor_manager->planDealloc(op_idx, tensor);
-          }
-        }
-      }
-    }
-  }
-
-  // Dispose and validate
-  for (const auto &ind : io_tensors)
-  {
-    --uses_map[ind];
-    if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
-    {
-      tensor_builder_map[ind]->notifyLastUse(ind);
-    }
-  }
-
-  for (const auto &ind : constants)
-  {
-    --uses_map[ind];
-    if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
-    {
-      tensor_builder_map[ind]->notifyLastUse(ind);
-    }
-  }
-
-  assert(
-      std::all_of(uses_map.begin(), uses_map.end(),
-                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-
-  assert(
-      std::all_of(def_map.begin(), def_map.end(),
-                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-}
-
 } // namespace compiler
 } // namespace onert
diff --git a/runtime/onert/core/src/compiler/Linear.h b/runtime/onert/core/src/compiler/Linear.h
index 1e24cf92b..56b42ccb0 100644
--- a/runtime/onert/core/src/compiler/Linear.h
+++ b/runtime/onert/core/src/compiler/Linear.h
@@ -22,7 +22,6 @@
 
 #include "ir/OpSequences.h"
 #include "ir/Index.h"
-#include "backend/ITensorBuilder.h"
 #include "compiler/LoweredGraph.h"
 
 namespace onert
@@ -44,8 +43,6 @@ public:
   static std::vector<ir::OpSequenceIndex> linearize(const compiler::LoweredGraph &lowered_graph);
   static void dump(const compiler::LoweredGraph &lowered_graph,
                    const std::vector<ir::OpSequenceIndex> &order);
-  static void planTensors(const compiler::LoweredGraph &lowered_graph,
-                          const std::vector<ir::OpSequenceIndex> &order);
 };
 
 } // namespace compiler
diff --git a/runtime/onert/core/src/compiler/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc
index 673d7d3e8..6d5210dc5 100644
--- a/runtime/onert/core/src/compiler/LoweredGraph.cc
+++ b/runtime/onert/core/src/compiler/LoweredGraph.cc
@@ -32,6 +32,7 @@
 #include "compiler/BackendResolver.h"
 #include "compiler/ManualScheduler.h"
 #include "compiler/HEScheduler.h"
+#include "util/TracingCtx.h"
 
 namespace onert
 {
@@ -40,6 +41,13 @@ namespace compiler
 
 LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph}
 {
+  // set tracing_ctx for copied graph
+  if (options.tracing_ctx)
+  {
+    auto subgraph_index = options.tracing_ctx->getSubgraphIndex(&graph);
+    options.tracing_ctx->setSubgraphIndex(&_graph, subgraph_index.value());
+  }
+
   bool linear_executor = (options.executor == "Linear");
 
   // Build backend contexts
@@ -112,7 +120,7 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option
         .run();
 
     // Set LowerInfo for each operand from the operand::LowerInfo holder
-    manipulateLowerInfo(operands_lower_info, options.is_primary_subgraph);
+    manipulateLowerInfo(operands_lower_info);
 
     dumpLowerInfo();
   }
@@ -126,7 +134,11 @@ LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &option
   // Optimization passes
   pass::PassRunner{}.append(std::make_unique<pass::PermutationEliminationPass>(*this)).run();
 
-  VERBOSE(OpSequences) << "Dump after permutation insertion" << std::endl;
+  VERBOSE(LoweredGraph) << "Dump after permutation insertion" << std::endl;
+  for (auto operand : _graph.getInputs())
+    VERBOSE(LoweredGraph) << "Graph Input : " << operand << std::endl;
+  for (auto operand : _graph.getOutputs())
+    VERBOSE(LoweredGraph) << "Graph Output : " << operand << std::endl;
   dumpOpSequences(_op_seqs, _graph.operations());
 
   // Graph verifications
@@ -322,50 +334,22 @@ void LoweredGraph::makeOpSequences(
 }
 
 void LoweredGraph::manipulateLowerInfo(
-    ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
-    bool is_primary)
+    ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info)
 {
   const auto controlflow_backend = BackendManager::get().getControlflow();
 
-  // TODO Rather than handling primary graph specially,
-  //      let the permute inserted and remove it later
-  if (is_primary)
+  // TODO Rather than using NHWC Get frontend layout of this node from IR
+  auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC};
+  for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
   {
-    // TODO Rather than using NHWC Get frontend layout of this node from IR
-    auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC};
-    for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
-    {
-      auto &&lower_info = operands_lower_info.at(index);
-      assert(lower_info->def_factors().empty());
-      lower_info->addDefPermuteFactor(factor);
-    }
-    for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED)
-    {
-      auto &&lower_info = operands_lower_info.at(index);
-      lower_info->addUsePermuteFactor(factor);
-    }
+    auto &&lower_info = operands_lower_info.at(index);
+    assert(lower_info->def_factors().empty());
+    lower_info->addDefPermuteFactor(factor);
   }
-  else
+  for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED)
   {
-    for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
-    {
-      auto &&lower_info = operands_lower_info.at(index);
-      if (!(lower_info->def_factors().size() == 0 && lower_info->use_factors().size() == 0))
-      {
-        // In case of not that Graph's input is not used in any operation and not the graph's
-        // output.
-        // In other words, it is not unused input in Graph.
-        lower_info->addDefPermuteFactor(*lower_info->use_factors().begin());
-      }
-      else
-      {
-        // In case of that an operand is Graph's input and not input or output of any operation
-        lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{
-            controlflow_backend,
-            ir::Layout::NHWC // TODO Get frontend layout of this node from IR
-        });
-      }
-    }
+    auto &&lower_info = operands_lower_info.at(index);
+    lower_info->addUsePermuteFactor(factor);
   }
   for (auto index : _graph.getOutputs() | ir::Remove::UNDEFINED)
   {
@@ -446,8 +430,11 @@ void LoweredGraph::dumpLowerInfo()
         sstream << (shape.dim(i)) << " ";
       }
       sstream << "}" << std::endl;
-      sstream << "  - Def ir::Operations  : " << def_ops << std::endl;
-      sstream << "  - Use ir::Operations  : " << use_ops << std::endl;
+      sstream << "  - Def Operations  : " << def_ops << std::endl;
+      sstream << "  - Use Operations  : " << use_ops << std::endl;
+      sstream << "  - Data            : "
+              << (object.data() ? (std::to_string(object.data()->size()) + " bytes") : "N/A")
+              << std::endl;
       sstream << "  - Lower Info" << std::endl;
       sstream << "    - Def Backends    : " << def_layouts << std::endl;
       sstream << "    - Use Backends    : " << use_layouts << std::endl;
diff --git a/runtime/onert/core/src/compiler/ManualScheduler.cc b/runtime/onert/core/src/compiler/ManualScheduler.cc
index ed49ee56f..1f4a47864 100644
--- a/runtime/onert/core/src/compiler/ManualScheduler.cc
+++ b/runtime/onert/core/src/compiler/ManualScheduler.cc
@@ -100,10 +100,11 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
   }
 
   // Dump final assignment
-  backend_resolver->iterate([&](const ir::OperationIndex &index, const backend::Backend &backend) {
-    VERBOSE(ManualScheduler) << "backend for operation #" << index.value() << ": "
-                             << backend.config()->id() << std::endl;
-  });
+  WHEN_LOG_ENABLED(backend_resolver->iterate(
+      [&](const ir::OperationIndex &index, const backend::Backend &backend) {
+        VERBOSE(ManualScheduler) << "backend for operation #" << index.value() << ": "
+                                 << backend.config()->id() << std::endl;
+      }));
 
   return backend_resolver;
 }
diff --git a/runtime/onert/core/src/compiler/ShapeValidator.cc b/runtime/onert/core/src/compiler/ShapeValidator.cc
index c18178da9..e0c9f5283 100644
--- a/runtime/onert/core/src/compiler/ShapeValidator.cc
+++ b/runtime/onert/core/src/compiler/ShapeValidator.cc
@@ -37,7 +37,7 @@ namespace compiler
 {
 
 ShapeValidator::ShapeValidator(const ir::Graph &graph)
-    : _graph{graph}, _ctx{graph.operands()}, _current_op_seq_layout{ir::Layout::UNKNOWN}
+    : _graph{graph}, _ctx{graph.operands()}, _current_layout{ir::Layout::UNKNOWN}
 {
 }
 
@@ -59,7 +59,7 @@ void ShapeValidator::operator()()
   // creating Compiler
   assert(_graph.subgraphs() == nullptr);
 
-  _current_op_seq_layout = _graph.layout();
+  _current_layout = _graph.layout();
 
   _graph.operations().iterate(
       [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
@@ -90,7 +90,7 @@ void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node)
   const auto block_size_index{
       node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
   const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
 
@@ -101,6 +101,14 @@ void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node)
 
   OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2);
 
+  if (node.getInputs().size() != 2)
+  {
+    const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
+    OP_REQUIRES(_ctx.at(crops_index).shape().rank() == 2);
+    OP_REQUIRES(_ctx.at(crops_index).shape().dim(0) == (_ctx.at(ifm_index).shape().rank() - 2));
+    OP_REQUIRES(_ctx.at(crops_index).shape().dim(1) == 2);
+  }
+
   OP_REQUIRES(input_shape.C == output_shape.C);
 }
 
@@ -330,7 +338,7 @@ void ShapeValidator::visit(const ir::operation::SpaceToBatchND &node)
       node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
   const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
   const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
 
@@ -355,7 +363,7 @@ void ShapeValidator::visit(const ir::operation::SpaceToDepth &node)
 
   const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
   const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
   const auto block_size = node.param().block_size;
@@ -471,7 +479,7 @@ void ShapeValidator::visit(const ir::operation::TransposeConv &node)
   OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank());
   OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank());
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
   // The kernel has only IHWO layout on frontend
@@ -516,7 +524,7 @@ void ShapeValidator::visit(const ir::operation::DepthToSpace &node)
 
   const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
 
-  const auto frontend_layout = _current_op_seq_layout;
+  const auto frontend_layout = _current_layout;
   const auto output_shape = _ctx.at(output_index).shape().asFeature(frontend_layout);
   const auto input_shape = _ctx.at(input_index).shape().asFeature(frontend_layout);
 
diff --git a/runtime/onert/core/src/compiler/ShapeValidator.h b/runtime/onert/core/src/compiler/ShapeValidator.h
index f40c098d5..763cf7ce3 100644
--- a/runtime/onert/core/src/compiler/ShapeValidator.h
+++ b/runtime/onert/core/src/compiler/ShapeValidator.h
@@ -93,7 +93,7 @@ private:
   // TODO Remove _ctx field
   const ir::Graph &_graph;
   const ir::Operands &_ctx;
-  ir::Layout _current_op_seq_layout;
+  ir::Layout _current_layout;
 };
 
 } // namespace compiler
diff --git a/runtime/onert/core/src/compiler/StaticShapeInferer.cc b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
index d3b083b78..1f2c6f3b9 100644
--- a/runtime/onert/core/src/compiler/StaticShapeInferer.cc
+++ b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
@@ -142,12 +142,12 @@ void StaticShapeInferer::dump()
   }
 }
 
-void StaticShapeInferer::visit(const ir::operation::ArgMax &op)
+void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op)
 {
-  const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
+  const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
   const auto &input = _operands.at(input_idx);
 
-  const auto axis_idx{op.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
+  const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
   const auto &axis = _operands.at(axis_idx);
 
   // get mutable output operand
@@ -166,7 +166,8 @@ void StaticShapeInferer::visit(const ir::operation::ArgMax &op)
   axis_value = axis_value < 0 ? axis_value + rank : axis_value;
 
   // re-sizing output shape
-  ir::Shape new_shape = shape_inference::inferArgMaxShape(input.info().shape(), axis_value, rank);
+  ir::Shape new_shape =
+      shape_inference::inferArgMinMaxShape(input.info().shape(), axis_value, rank);
   output.info().shape(new_shape);
 }
 
@@ -335,35 +336,47 @@ void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
 
   // even when axis is constant, output shape should be recalculated since user might call
   // nnfw_set_input_tensorinfo(input, some_new_shape)
-  auto axis_buf = reinterpret_cast<const int32_t *>(axis.data()->base());
-  assert(axis_buf);
+  auto axis_type = axis.typeInfo().type();
+  assert(axis_type == ir::DataType::INT32 || axis_type == ir::DataType::INT64);
+
+  assert(axis.data()->base());
+  int32_t axis_value =
+      (axis_type == ir::DataType::INT32)
+          ? reinterpret_cast<const int32_t *>(axis.data()->base())[0]
+          : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis.data()->base())[0]);
 
   // re-sizing output shape
-  ir::Shape new_shape = shape_inference::inferExpandDimsShape(input.info().shape(), axis_buf[0]);
+  ir::Shape new_shape = shape_inference::inferExpandDimsShape(input.info().shape(), axis_value);
   output.info().shape(new_shape);
 }
 
 void StaticShapeInferer::visit(const ir::operation::Fill &op)
 {
-  const auto input_idx{op.getInputs().at(ir::operation::Fill::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
+  const auto shape_idx{op.getInputs().at(ir::operation::Fill::Input::SHAPE)};
+  const auto &shape = _operands.at(shape_idx);
   const auto output_idx = op.getOutputs().at(0);
   ir::Operand &output = _operands.at(output_idx);
 
-  if (!input.isConstant())
+  if (!shape.isConstant())
   {
     output.info().setDynamic();
     _return_has_dynamic_tensor = true;
     return;
   }
 
-  assert(input.typeInfo().type() == ir::DataType::INT32);
+  const auto dims_type = shape.typeInfo().type();
+  assert(dims_type == ir::DataType::INT32 || dims_type == ir::DataType::INT64);
 
-  auto input_buf = reinterpret_cast<const int32_t *>(input.data()->base());
-  assert(input_buf);
+  auto dims_buf = shape.data()->base();
+  assert(dims_buf);
+
+  const auto &dims_shape = shape.info().shape();
+  auto new_shape = ((dims_type == ir::DataType::INT32)
+                        ? shape_inference::inferFillShape<int32_t>(
+                              dims_shape, reinterpret_cast<const int32_t *>(dims_buf))
+                        : shape_inference::inferFillShape<int64_t>(
+                              dims_shape, reinterpret_cast<const int64_t *>(dims_buf)));
 
-  // re-sizing output shape
-  ir::Shape new_shape = shape_inference::inferFillShape(input.info().shape(), input_buf);
   output.info().shape(new_shape);
 }
 
diff --git a/runtime/onert/core/src/compiler/TensorBuilders.h b/runtime/onert/core/src/compiler/TensorBuilders.h
deleted file mode 100644
index 3b0360b4b..000000000
--- a/runtime/onert/core/src/compiler/TensorBuilders.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_COMPILER_TENSOR_BUILDERS_H__
-#define __ONERT_COMPILER_TENSOR_BUILDERS_H__
-
-#include <unordered_set>
-#include <memory>
-#include "backend/BackendContext.h"
-#include "backend/Backend.h"
-#include "backend/controlflow/Config.h"
-#include "backend/controlflow/TensorBuilder.h"
-#include "util/logging.h"
-
-namespace onert
-{
-namespace compiler
-{
-
-class TensorBuilders
-{
-public:
-  TensorBuilders() = default;
-
-  TensorBuilders(const onert::backend::BackendContexts &backend_contexts, bool include_controlflow)
-  {
-    for (const auto &e : backend_contexts)
-    {
-      if (e.first->config()->id() == backend::controlflow::Config::ID)
-      {
-        _cf_tensor_builder = std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(
-            e.second->tensor_builder);
-        if (include_controlflow)
-          _tensor_builders.insert(e.second->tensor_builder);
-      }
-      else
-      {
-        _tensor_builders.insert(e.second->tensor_builder);
-      }
-    }
-  }
-
-  std::unordered_set<std::shared_ptr<onert::backend::ITensorBuilder>>::const_iterator begin() const
-  {
-    return _tensor_builders.cbegin();
-  }
-  std::unordered_set<std::shared_ptr<onert::backend::ITensorBuilder>>::const_iterator end() const
-  {
-    return _tensor_builders.cend();
-  }
-
-  std::shared_ptr<backend::controlflow::TensorBuilder> getControlflowTensorBuilder() const
-  {
-    return _cf_tensor_builder;
-  }
-
-private:
-  std::unordered_set<std::shared_ptr<backend::ITensorBuilder>> _tensor_builders;
-  std::shared_ptr<backend::controlflow::TensorBuilder> _cf_tensor_builder;
-};
-
-} // namespace compiler
-} // namespace onert
-
-#endif // __ONERT_COMPILER_TENSOR_BUILDERS_H__
diff --git a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
index c83a72ada..8467d51c8 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
@@ -130,9 +130,11 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde
 
   // Generate output operand and permute operation
   auto out_operand_index = _graph.addOperand(operand.shape(), operand.typeInfo());
-  // change model output if operand_index is model output index
+  // change model output if operand_index is model output index and the out operand is controlflow
+  // backend
   auto &model_outputs = _graph.getOutputs();
-  if (model_outputs.contains(operand_index))
+  const backend::Backend *cf_backend = compiler::BackendManager::get().getControlflow();
+  if (model_outputs.contains(operand_index) && factor.backend() == cf_backend)
   {
     model_outputs.replace(operand_index, out_operand_index);
   }
@@ -191,8 +193,10 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde
   const auto &node = _graph.operations().at(node_index);
 
   VERBOSE_F() << "Permute Op inserted, node index : " << node_index << std::endl;
-  VERBOSE_F() << "  - Input (original) Operand : " << operand_index << std::endl;
-  VERBOSE_F() << "  - Output(inserted) Operand : " << out_operand_index << std::endl;
+  VERBOSE_F() << "  - Input (original) Operand : " << operand_index << "("
+              << input_factor.backend()->config()->id() << ")" << std::endl;
+  VERBOSE_F() << "  - Output(inserted) Operand : " << out_operand_index << "("
+              << factor.backend()->config()->id() << ")" << std::endl;
 
   // OpSequence
   {
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.cc b/runtime/onert/core/src/exec/DataflowExecutor.cc
index 53bc3c204..b81a75794 100644
--- a/runtime/onert/core/src/exec/DataflowExecutor.cc
+++ b/runtime/onert/core/src/exec/DataflowExecutor.cc
@@ -78,11 +78,10 @@ bool DataflowExecutor::noWaitingJobs()
 }
 
 DataflowExecutor::DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
-                                   const std::vector<backend::ITensor *> &input_tensors,
-                                   const std::vector<backend::ITensor *> &output_tensors,
                                    const compiler::TensorRegistries &tensor_regs,
-                                   compiler::CodeMap &&code_map)
-    : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs},
+                                   compiler::CodeMap &&code_map,
+                                   const util::TracingCtx *tracing_ctx)
+    : ExecutorBase{std::move(lowered_graph), tensor_regs, tracing_ctx},
       _code_map{std::move(code_map)}
 {
   VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl;
@@ -143,7 +142,9 @@ void DataflowExecutor::executeImpl()
   }
   assert(!_ready_jobs.empty()); // Cannot begin if there is no initial jobs
 
-  _subject.notifyModelBegin(this);
+  auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph);
+
+  _subject.notifySubgraphBegin(profiling_subg_index);
 
   while (!_ready_jobs.empty())
   {
@@ -157,7 +158,7 @@ void DataflowExecutor::executeImpl()
     const backend::Backend *backend =
         _lowered_graph->getLowerInfo()->op_seq.at(op_seq_index)->backend();
 
-    _subject.notifyJobBegin(this, op_seq, backend);
+    _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend);
 
     job->fn_seq()->initRunning();
 
@@ -167,13 +168,13 @@ void DataflowExecutor::executeImpl()
 
     job->run();
 
-    _subject.notifyJobEnd(this, op_seq, backend);
+    _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend);
     notify(job_index);
     _finished_jobs[job_index] = std::move(job);
   }
   assert(noWaitingJobs());
 
-  _subject.notifyModelEnd(this);
+  _subject.notifySubgraphEnd(profiling_subg_index);
 
   // Reset input info for the next execution
   _input_info = _initial_input_info;
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h
index 69dfda15c..b72c0d030 100644
--- a/runtime/onert/core/src/exec/DataflowExecutor.h
+++ b/runtime/onert/core/src/exec/DataflowExecutor.h
@@ -28,6 +28,7 @@
 #include <memory>
 #include "exec/ExecutorBase.h"
 #include "compiler/CodeMap.h"
+#include "util/TracingCtx.h"
 
 namespace onert
 {
@@ -50,9 +51,8 @@ public:
    * @param code_map OpSequence and its code map
    */
   DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
-                   const std::vector<backend::ITensor *> &input_tensors,
-                   const std::vector<backend::ITensor *> &output_tensors,
-                   const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map);
+                   const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
+                   const util::TracingCtx *tracing_ctx);
 
   void executeImpl() override;
 
diff --git a/runtime/onert/core/src/exec/DynamicShapeInferer.cc b/runtime/onert/core/src/exec/DynamicShapeInferer.cc
index 1666d3f08..2d9d534f1 100644
--- a/runtime/onert/core/src/exec/DynamicShapeInferer.cc
+++ b/runtime/onert/core/src/exec/DynamicShapeInferer.cc
@@ -92,12 +92,12 @@ void DynamicShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
   assert(output->buffer() != nullptr);
 }
 
-void DynamicShapeInferer::visit(const ir::operation::ArgMax &op)
+void DynamicShapeInferer::visit(const ir::operation::ArgMinMax &op)
 {
-  const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
+  const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
   const auto input = _tensor_registry->getITensor(input_idx);
 
-  const auto axis_idx{op.getInputs().at(ir::operation::ArgMax::Input::AXIS)};
+  const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
   const auto axis = _tensor_registry->getITensor(axis_idx);
 
   auto output_ind = op.getOutputs().at(0);
@@ -111,7 +111,7 @@ void DynamicShapeInferer::visit(const ir::operation::ArgMax &op)
   const auto rank = input_shape.rank();
   axis_value = axis_value < 0 ? axis_value + rank : axis_value;
 
-  ir::Shape new_shape = shape_inference::inferArgMaxShape(input_shape, axis_value, rank);
+  ir::Shape new_shape = shape_inference::inferArgMinMaxShape(input_shape, axis_value, rank);
 
   output->applyShape(new_shape);
   assert(output->buffer() != nullptr);
@@ -388,10 +388,16 @@ void DynamicShapeInferer::visit(const ir::operation::ExpandDims &op)
 
   auto axis_ind = op.getInputs().at(ir::operation::ExpandDims::AXIS);
   auto axis = _tensor_registry->getITensor(axis_ind);
-  auto axis_buf = reinterpret_cast<const int32_t *>(axis->buffer());
-  assert(axis_buf);
+  auto axis_type = axis->data_type();
+  assert(axis_type == ir::DataType::INT32 || axis_type == ir::DataType::INT64);
 
-  auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_buf[0]);
+  assert(axis->buffer());
+  int32_t axis_value =
+      (axis_type == ir::DataType::INT32)
+          ? reinterpret_cast<const int32_t *>(axis->buffer())[0]
+          : static_cast<int32_t>(reinterpret_cast<const int64_t *>(axis->buffer())[0]);
+
+  auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_value);
 
   output->applyShape(output_shape);
   assert(output->buffer() != nullptr);
@@ -402,19 +408,24 @@ void DynamicShapeInferer::visit(const ir::operation::Fill &op)
   // check if output is not dynamic
   auto output_ind = op.getOutputs().at(0);
   auto output = _tensor_registry->getITensor(output_ind);
-  auto input_ind = op.getInputs().at(ir::operation::Fill::Input::INPUT);
-  auto input = _tensor_registry->getITensor(input_ind);
-  ir::Shape input_shape = input->getShape();
+  auto shape_ind = op.getInputs().at(ir::operation::Fill::Input::SHAPE);
+  auto shape = _tensor_registry->getITensor(shape_ind);
 
-  if ((!input->is_dynamic()) && (!output->is_dynamic()))
+  if ((!shape->is_dynamic()) && (!output->is_dynamic()))
     return;
 
-  assert(input->data_type() == ir::DataType::INT32);
+  const auto dims_type = shape->data_type();
+  assert(dims_type == ir::DataType::INT32 || dims_type == ir::DataType::INT64);
 
-  auto input_buf = reinterpret_cast<const int32_t *>(input->buffer());
-  assert(input_buf);
+  auto dims_buf = shape->buffer();
+  assert(dims_buf);
 
-  auto output_shape = shape_inference::inferFillShape(input_shape, input_buf);
+  const auto &dims_shape = shape->getShape();
+  auto output_shape = ((dims_type == ir::DataType::INT32)
+                           ? shape_inference::inferFillShape<int32_t>(
+                                 dims_shape, reinterpret_cast<const int32_t *>(dims_buf))
+                           : shape_inference::inferFillShape<int64_t>(
+                                 dims_shape, reinterpret_cast<const int64_t *>(dims_buf)));
 
   output->applyShape(output_shape);
   assert(output->buffer() != nullptr);
diff --git a/runtime/onert/core/src/exec/ExecTime.h b/runtime/onert/core/src/exec/ExecTime.h
index 846d0930b..d2ddbad34 100644
--- a/runtime/onert/core/src/exec/ExecTime.h
+++ b/runtime/onert/core/src/exec/ExecTime.h
@@ -94,7 +94,7 @@ public:
   /**
    * @brief Update metrics file with new data.
    */
-  void uploadOperationsExecTime() const { _json.uploadOperationsExecTime(); }
+  void storeOperationsExecTime() const { _json.storeOperationsExecTime(); }
   static const int64_t NOT_FOUND = -1;
 
 private:
diff --git a/runtime/onert/core/src/exec/ExecutionObservee.cc b/runtime/onert/core/src/exec/ExecutionObservee.cc
index ddb1fb6a0..d5003b126 100644
--- a/runtime/onert/core/src/exec/ExecutionObservee.cc
+++ b/runtime/onert/core/src/exec/ExecutionObservee.cc
@@ -26,37 +26,38 @@ void ExecutionObservee::add(std::unique_ptr<IExecutionObserver> observer)
   _observers.emplace_back(std::move(observer));
 }
 
-void ExecutionObservee::notifyModelBegin(IExecutor *executor)
+void ExecutionObservee::notifySubgraphBegin(ir::SubgraphIndex ind)
 {
   for (auto &o : _observers)
   {
-    o->handleBegin(executor);
+    o->handleSubgraphBegin(ind);
   }
 }
 
-void ExecutionObservee::notifyModelEnd(IExecutor *executor)
+void ExecutionObservee::notifySubgraphEnd(ir::SubgraphIndex ind)
 {
   for (auto &o : _observers)
   {
-    o->handleEnd(executor);
+    o->handleSubgraphEnd(ind);
   }
 }
 
-void ExecutionObservee::notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq,
+void ExecutionObservee::notifyJobBegin(IExecutor *executor, ir::SubgraphIndex index,
+                                       const ir::OpSequence *op_seq,
                                        const backend::Backend *backend)
 {
   for (auto &o : _observers)
   {
-    o->handleBegin(executor, op_seq, backend);
+    o->handleJobBegin(executor, index, op_seq, backend);
   }
 }
 
-void ExecutionObservee::notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq,
-                                     const backend::Backend *backend)
+void ExecutionObservee::notifyJobEnd(IExecutor *executor, ir::SubgraphIndex index,
+                                     const ir::OpSequence *op_seq, const backend::Backend *backend)
 {
   for (auto &o : _observers)
   {
-    o->handleEnd(executor, op_seq, backend);
+    o->handleJobEnd(executor, index, op_seq, backend);
   }
 }
 
diff --git a/runtime/onert/core/src/exec/ExecutionObservee.h b/runtime/onert/core/src/exec/ExecutionObservee.h
index 49d409a3a..62b3f6201 100644
--- a/runtime/onert/core/src/exec/ExecutionObservee.h
+++ b/runtime/onert/core/src/exec/ExecutionObservee.h
@@ -20,6 +20,7 @@
 #include <list>
 
 #include "exec/ExecutionObservers.h"
+#include "ir/Index.h"
 
 namespace onert
 {
@@ -39,11 +40,11 @@ public:
    * @param observer Observer to be added
    */
   void add(std::unique_ptr<IExecutionObserver> observer);
-  void notifyModelBegin(IExecutor *executor);
-  void notifyModelEnd(IExecutor *executor);
-  void notifyJobBegin(IExecutor *executor, const ir::OpSequence *op_seq,
+  void notifySubgraphBegin(ir::SubgraphIndex ind);
+  void notifySubgraphEnd(ir::SubgraphIndex ind);
+  void notifyJobBegin(IExecutor *executor, ir::SubgraphIndex index, const ir::OpSequence *op_seq,
                       const backend::Backend *backend);
-  void notifyJobEnd(IExecutor *executor, const ir::OpSequence *op_seq,
+  void notifyJobEnd(IExecutor *executor, ir::SubgraphIndex index, const ir::OpSequence *op_seq,
                     const backend::Backend *backend);
 
 private:
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.cc b/runtime/onert/core/src/exec/ExecutionObservers.cc
index 066b52ee1..18c0c1dd3 100644
--- a/runtime/onert/core/src/exec/ExecutionObservers.cc
+++ b/runtime/onert/core/src/exec/ExecutionObservers.cc
@@ -17,12 +17,62 @@
 #include "exec/ExecutionObservers.h"
 
 #include <string>
+#include <sstream>
 
 #include "util/logging.h"
 #include "exec/IExecutor.h"
 #include "misc/polymorphic_downcast.h"
 #include "ir/OpSequence.h"
 #include "util/EventWriter.h"
+#include "util/Utils.h"
+
+namespace
+{
+
+void setUserData(const onert::ir::Graph &g, const onert::ir::OpSequence *op_seq,
+                 decltype(EventCollector::Event::userData) &data)
+{
+  if (op_seq->size() == 0)
+    return;
+
+  // From a tensor of shape [a, b, c], this will return a string "shape(a b c)".
+  // String like "[1, 2, 3]" looks better but this will be considered as a list in Json
+  // so text search (e.g., Ctrl-F in Chrome Tracing) could be difficult
+  auto build_shape_str = [&](onert::ir::OperandIndex operand_idx) {
+    std::string shape_str;
+    auto &shape = g.operands().at(operand_idx).info().shape();
+    for (int i = 0; i < shape.rank(); i++)
+    {
+      if (i == 0)
+        shape_str = "shape(" + std::to_string(shape.dim(i));
+      else
+        shape_str += " " + std::to_string(shape.dim(i));
+    }
+    shape_str += ")";
+
+    return shape_str;
+  };
+
+  const auto &first_op_idx = op_seq->operations().at(0);
+  const auto &first_op_node = g.operations().at(first_op_idx);
+
+  auto &inputs = first_op_node.getInputs();
+  auto size = inputs.size();
+  for (size_t i = 0; i < size; i++)
+  {
+    auto operand_idx = inputs.at(i);
+    if (operand_idx.undefined())
+      continue;
+
+    std::string key("input_shape_" + std::to_string(i));
+    std::string value = build_shape_str(operand_idx);
+    data.emplace_back(std::make_pair(key, value));
+  }
+
+  // add other userData as needed
+}
+
+} // namespace
 
 namespace onert
 {
@@ -30,8 +80,8 @@ namespace onert
 namespace exec
 {
 
-void ProfileObserver::handleBegin(onert::exec::IExecutor *, const ir::OpSequence *,
-                                  const onert::backend::Backend *backend)
+void ProfileObserver::handleJobBegin(onert::exec::IExecutor *, ir::SubgraphIndex,
+                                     const ir::OpSequence *, const onert::backend::Backend *backend)
 {
   _timer = backend->config()->timer();
   if (_timer == nullptr)
@@ -39,8 +89,8 @@ void ProfileObserver::handleBegin(onert::exec::IExecutor *, const ir::OpSequence
   _timer->handleBegin();
 }
 
-void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq,
-                                const backend::Backend *backend)
+void ProfileObserver::handleJobEnd(IExecutor *exec, ir::SubgraphIndex, const ir::OpSequence *op_seq,
+                                   const backend::Backend *backend)
 {
   _timer->handleEnd();
   const auto timer_res = _timer->getTime();
@@ -70,51 +120,74 @@ void ProfileObserver::handleEnd(IExecutor *exec, const ir::OpSequence *op_seq,
   }
 };
 
-ChromeTracingObserver::ChromeTracingObserver(const std::string &filepath, const ir::Graph &graph)
-    : _base_filepath(filepath), _recorder{}, _collector{&_recorder}, _graph{graph}
+TracingObserver::TracingObserver(const std::string &filepath, const ir::Graph &graph,
+                                 const util::TracingCtx *tracing_ctx)
+    : _recorder{std::make_unique<EventRecorder>()}, _collector{_recorder.get()}, _graph{graph},
+      _tracing_ctx{tracing_ctx}
 {
+  // TODO Remove below after using _tracing_ctx
+  UNUSED_RELEASE(_tracing_ctx);
+
+  _event_writer = EventWriter::get(filepath);
+  _event_writer->startToUse();
 }
 
-ChromeTracingObserver::~ChromeTracingObserver()
+TracingObserver::~TracingObserver()
 {
   try
   {
-    EventWriter{_recorder}.writeToFiles(_base_filepath);
+    _event_writer->readyToFlush(std::move(_recorder));
   }
   catch (const std::exception &e)
   {
-    std::cerr << "E: Fail to record event in ChromeTracingObserver: " << e.what() << std::endl;
+    std::cerr << "E: Fail to record event in TracingObserver: " << e.what() << std::endl;
   }
 }
 
-void ChromeTracingObserver::handleBegin(IExecutor *)
+void TracingObserver::handleSubgraphBegin(ir::SubgraphIndex subg_ind)
 {
+  // TODO Write subg_ind into profling result
+  UNUSED_RELEASE(subg_ind);
   _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "runtime", "Graph"});
 }
 
-void ChromeTracingObserver::handleBegin(IExecutor *, const ir::OpSequence *op_seq,
-                                        const backend::Backend *backend)
+void TracingObserver::handleJobBegin(IExecutor *, ir::SubgraphIndex subg_ind,
+                                     const ir::OpSequence *op_seq, const backend::Backend *backend)
 {
+  // TODO Write subg_ind into profling result
+  UNUSED_RELEASE(subg_ind);
+
   std::string backend_id = backend->config()->id();
-  _collector.onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, backend_id,
-                                           opSequenceTag(op_seq, _graph.operations())});
+
+  auto ev = EventCollector::Event{EventCollector::Edge::BEGIN, backend_id,
+                                  opSequenceTag(op_seq, _graph.operations())};
+  // add shape of inputs
+  setUserData(_graph, op_seq, ev.userData);
+
+  _collector.onEvent(ev);
 }
 
-void ChromeTracingObserver::handleEnd(IExecutor *, const ir::OpSequence *op_seq,
-                                      const backend::Backend *backend)
+void TracingObserver::handleJobEnd(IExecutor *, ir::SubgraphIndex subg_ind,
+                                   const ir::OpSequence *op_seq, const backend::Backend *backend)
 {
+  // TODO Write subg_ind into profling result
+  UNUSED_RELEASE(subg_ind);
+
   std::string backend_id = backend->config()->id();
   _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, backend_id,
                                            opSequenceTag(op_seq, _graph.operations())});
 }
 
-void ChromeTracingObserver::handleEnd(IExecutor *)
+void TracingObserver::handleSubgraphEnd(ir::SubgraphIndex subg_ind)
 {
+  // TODO Write subg_ind into profling result
+  UNUSED_RELEASE(subg_ind);
+
   _collector.onEvent(EventCollector::Event{EventCollector::Edge::END, "runtime", "Graph"});
 }
 
-std::string ChromeTracingObserver::opSequenceTag(const ir::OpSequence *op_seq,
-                                                 const ir::Operations &operations)
+std::string TracingObserver::opSequenceTag(const ir::OpSequence *op_seq,
+                                           const ir::Operations &operations)
 {
   if (op_seq->size() == 0)
     return "Empty OpSequence";
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.h b/runtime/onert/core/src/exec/ExecutionObservers.h
index f8c2acca5..a9eebfee1 100644
--- a/runtime/onert/core/src/exec/ExecutionObservers.h
+++ b/runtime/onert/core/src/exec/ExecutionObservers.h
@@ -18,12 +18,16 @@
 #define __ONERT_EXEC_OBSREVERS_H__
 
 #include "exec/IFunction.h"
+#include "ir/Index.h"
 #include "ir/OpSequence.h"
 #include "ExecTime.h"
 #include "util/ITimer.h"
 #include "exec/IExecutor.h"
 #include "util/EventCollector.h"
 #include "util/EventRecorder.h"
+#include "util/EventWriter.h"
+#include "util/TracingCtx.h"
+#include "util/EventWriter.h"
 
 namespace onert
 {
@@ -33,13 +37,15 @@ class IExecutionObserver
 {
 public:
   /// @brief Invoked just before model (not individual operation) execution begins
-  virtual void handleBegin(IExecutor *) { return; }
+  virtual void handleSubgraphBegin(ir::SubgraphIndex) { return; }
 
-  virtual void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0;
-  virtual void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) = 0;
+  virtual void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+                              const backend::Backend *) = 0;
+  virtual void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+                            const backend::Backend *) = 0;
 
   /// @brief Invoked just after model (not individual operation) execution ends
-  virtual void handleEnd(IExecutor *) { return; }
+  virtual void handleSubgraphEnd(ir::SubgraphIndex) { return; }
 
   virtual ~IExecutionObserver() = default;
 };
@@ -51,10 +57,12 @@ public:
       : _et(std::move(et)), _graph(graph)
   {
   }
-  void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
-  void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
+  void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+                      const backend::Backend *) override;
+  void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+                    const backend::Backend *) override;
 
-  void handleEnd(IExecutor *) override { _et->uploadOperationsExecTime(); }
+  void handleSubgraphEnd(ir::SubgraphIndex) override { _et->storeOperationsExecTime(); }
 
 private:
   std::unique_ptr<util::ITimer> _timer;
@@ -62,24 +70,28 @@ private:
   const ir::Graph &_graph;
 };
 
-class ChromeTracingObserver : public IExecutionObserver
+class TracingObserver : public IExecutionObserver
 {
 public:
-  ChromeTracingObserver(const std::string &filepath, const ir::Graph &graph);
-  ~ChromeTracingObserver();
-  void handleBegin(IExecutor *) override;
-  void handleBegin(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
-  void handleEnd(IExecutor *, const ir::OpSequence *, const backend::Backend *) override;
-  void handleEnd(IExecutor *) override;
+  TracingObserver(const std::string &filepath, const ir::Graph &graph,
+                  const util::TracingCtx *tracing_ctx);
+  ~TracingObserver();
+  void handleSubgraphBegin(ir::SubgraphIndex) override;
+  void handleJobBegin(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+                      const backend::Backend *) override;
+  void handleJobEnd(IExecutor *, ir::SubgraphIndex, const ir::OpSequence *,
+                    const backend::Backend *) override;
+  void handleSubgraphEnd(ir::SubgraphIndex) override;
 
 private:
   static std::string opSequenceTag(const ir::OpSequence *op_seq, const ir::Operations &operations);
 
 private:
-  const std::string &_base_filepath;
-  EventRecorder _recorder;
+  std::unique_ptr<EventRecorder> _recorder;
   EventCollector _collector;
   const ir::Graph &_graph;
+  EventWriter *_event_writer;
+  const util::TracingCtx *_tracing_ctx;
 };
 
 } // namespace exec
diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc
index 018a0bba0..588a3258d 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.cc
+++ b/runtime/onert/core/src/exec/ExecutorBase.cc
@@ -15,11 +15,11 @@
  */
 
 #include "ExecutorBase.h"
+#include "ShapeConverter.h"
 
-#include "backend/ITensor.h"
 #include "backend/controlflow/UserTensor.h"
-#include "backend/cpu_common/Tensor.h"
 #include "util/logging.h"
+#include "misc/polymorphic_downcast.h"
 
 namespace onert
 {
@@ -27,43 +27,27 @@ namespace exec
 {
 
 ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
-                           const std::vector<backend::ITensor *> &input_tensors,
-                           const std::vector<backend::ITensor *> &output_tensors,
-                           const compiler::TensorRegistries &tensor_regs)
-    : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()},
-      _input_tensors{input_tensors}, _output_tensors{output_tensors}, _mutex()
+                           const compiler::TensorRegistries &tensor_regs,
+                           const util::TracingCtx *tracing_ctx)
+    : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()}, _mutex(),
+      _tracing_ctx(tracing_ctx)
 {
-  // TODO Fix the way of knowing whether it is primary or not
-  bool primary_executor = !(_input_tensors.empty() && _output_tensors.empty());
-  if (!primary_executor)
-  {
-    auto build_input_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
-      std::vector<backend::ITensor *> list;
-      for (auto ind : ind_seq)
-      {
-        backend::ITensor *tensor = tensor_regs.getITensor(ind);
-        assert(tensor != nullptr);
-        list.push_back(tensor);
-      }
-      return list;
-    };
-    auto build_output_tensor_list = [&](const onert::ir::OperandIndexSequence &ind_seq) {
-      std::vector<backend::ITensor *> list;
-      for (auto ind : ind_seq)
-      {
-        backend::ITensor *tensor = tensor_regs.getITensor(ind);
-        assert(tensor != nullptr);
-        list.push_back(tensor);
-      }
-      return list;
-    };
-    _input_tensors = build_input_tensor_list(_graph.getInputs());
-    _output_tensors = build_output_tensor_list(_graph.getOutputs());
-  }
+  auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) {
+    assert(tensors.empty());
+    for (auto ind : ind_seq)
+    {
+      backend::ITensor *tensor = tensor_regs.getITensor(ind);
+      assert(tensor != nullptr);
+      auto io_tensor = nnfw::misc::polymorphic_downcast<backend::controlflow::IOTensor *>(tensor);
+      tensors.push_back(io_tensor);
+    }
+  };
+  build_tensor_list(_graph.getInputs(), _input_tensors);
+  build_tensor_list(_graph.getOutputs(), _output_tensors);
 }
 
-void ExecutorBase::execute(const std::vector<backend::ITensor *> &src_tensors,
-                           const std::shared_ptr<IPermuteFunction> &pre_fn)
+void ExecutorBase::execute(const std::vector<backend::IPortableTensor *> &inputs,
+                           const std::vector<backend::IPortableTensor *> &outputs)
 {
   // For thread-safe, use mutex
   // TODO: if all used backends on this executor are thread-safe,
@@ -71,31 +55,37 @@ void ExecutorBase::execute(const std::vector<backend::ITensor *> &src_tensors,
   // Deadlock occurs when an Executor is called recursively.
   std::lock_guard<std::mutex> lock(_mutex);
 
-  assert(src_tensors.size() == _graph.getInputs().size());
-  assert(src_tensors.size() == _input_tensors.size());
-  for (uint32_t n = 0; n < _graph.getInputs().size(); ++n)
+  assert(inputs.size() == _graph.getInputs().size());
+  assert(inputs.size() == _input_tensors.size());
+  for (uint32_t n = 0; n < inputs.size(); ++n)
   {
-    // when user changes input shape, the input tensor is dynamic and its memory is not allocated.
-    // This code find the info to allocate dynamic tensor, and allocate memory based on the source
-    // tensor's shape set by caller.
-    const auto src_tensor = src_tensors[n];
+    const auto input = inputs[n];
+    assert(input->buffer() != nullptr);
     auto input_tensor = _input_tensors[n];
-    // If src_tensor or input_tensor is nullptr, pre_fn does not copy the tensors
-    if (src_tensor != nullptr && input_tensor != nullptr)
+    assert(input_tensor != nullptr);
+    if (input != nullptr)
     {
-      const auto orig_input_shape = input_tensor->getShape();
+      const auto orig_input_shape = input_tensor->orig_info().shape();
       const auto changed_input_shape =
-          convertShape(src_tensor->getShape(), src_tensor->layout(), input_tensor->layout());
+          convertShape(input->getShape(), input->layout(), input_tensor->orig_layout());
       if (orig_input_shape != changed_input_shape)
       {
         input_tensor->set_dynamic();
       }
     }
+    input_tensor->setTensor(input);
   }
 
-  // TODO Move calling permute_fn.run() into executeImpl()
-  assert(pre_fn);
-  pre_fn->run();
+  assert(outputs.size() == _graph.getOutputs().size());
+  assert(outputs.size() == _output_tensors.size());
+  for (uint32_t n = 0; n < outputs.size(); ++n)
+  {
+    const auto output = outputs[n];
+    // assert(dst_tensor->buffer() != nullptr);
+    auto output_tensor = _output_tensors[n];
+    assert(output_tensor != nullptr);
+    output_tensor->setTensor(output);
+  }
 
   executeImpl();
 }
@@ -111,19 +101,19 @@ void ExecutorBase::execute(const IODescription &desc)
   assert(_input_tensors.size() == desc.inputs.size());
   for (uint32_t i = 0; i < _input_tensors.size(); ++i)
   {
-    // TODO Remove dynamic_cast
-    auto *tensor = dynamic_cast<backend::controlflow::UserTensor *>(_input_tensors[i]);
-    assert(tensor);
+    auto tensor = _input_tensors[i];
+
+    // TODO Check if (desc.inputs[i] == nullptr)
+    // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
+    tensor->setUserTensor(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)),
+                          desc.inputs[i]->size);
+
     auto input_shape = desc.dynamic_input_shapes.find(ir::IOIndex{i});
     if (input_shape != desc.dynamic_input_shapes.end())
     {
       tensor->set_dynamic();
       tensor->setShape(input_shape->second);
     }
-    // TODO Check if (desc.inputs[i] == nullptr)
-    // TODO Better design for ITensor? (we need const_cast as ITensor is writable)
-    tensor->setBuffer(static_cast<uint8_t *>(const_cast<void *>(desc.inputs[i]->buffer)),
-                      desc.inputs[i]->size);
 
     handleDynamicInputTensor(ir::IOIndex{i}, desc);
   }
@@ -131,13 +121,12 @@ void ExecutorBase::execute(const IODescription &desc)
   assert(_output_tensors.size() == desc.outputs.size());
   for (uint32_t i = 0; i < _output_tensors.size(); ++i)
   {
-    // TODO Remove dynamic_cast
-    auto *tensor = dynamic_cast<backend::controlflow::UserTensor *>(_output_tensors[i]);
-    assert(tensor);
-    tensor->set_dynamic(); // It can't be resized but shape could change
+    auto tensor = _output_tensors[i];
+
     if (desc.outputs[i] == nullptr)
       throw std::runtime_error{"Output " + std::to_string(i) + "'s buffer is not set."};
-    tensor->setBuffer(static_cast<uint8_t *>(desc.outputs[i]->buffer), desc.outputs[i]->size);
+    tensor->setUserTensor(static_cast<uint8_t *>(desc.outputs[i]->buffer), desc.outputs[i]->size);
+    tensor->set_dynamic(); // It can't be resized but shape could change
   }
 
   executeImpl();
diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h
index 8a6ec9174..5d95c10bf 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.h
+++ b/runtime/onert/core/src/exec/ExecutorBase.h
@@ -17,23 +17,25 @@
 #ifndef __ONERT_EXEC_EXECUTOR_BASE_H__
 #define __ONERT_EXEC_EXECUTOR_BASE_H__
 
-#include <mutex>
-
 #include "IPermuteFunction.h"
-#include "exec/ExecutionObservers.h"
-#include "ShapeConverter.h"
 #include "exec/IExecutor.h"
-#include "compiler/LoweredGraph.h"
-#include "ir/LowerInfoMap.h"
-#include "backend/IConfig.h"
-#include "backend/Backend.h"
 #include "exec/ExecTime.h"
-#include "exec/IFunction.h"
-#include "backend/IDynamicTensorManager.h"
-#include "backend/ITensorManager.h"
 #include "exec/ExecutionObservee.h"
+#include "exec/IFunction.h"
+#include "exec/IODescription.h"
+#include "ir/Graph.h"
+#include "ir/Index.h"
+#include "ir/LowerInfoMap.h"
+#include "ir/OperationIndexMap.h"
+#include "compiler/LoweredGraph.h"
 #include "compiler/TensorRegistries.h"
-#include <list>
+#include "backend/controlflow/IOTensor.h"
+#include "util/TracingCtx.h"
+
+#include <cstdint>
+#include <memory>
+#include <mutex>
+#include <vector>
 
 namespace onert
 {
@@ -49,25 +51,17 @@ public:
    * @param tensor_builders Tensor builders that are currently used
    */
   ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
-               const std::vector<backend::ITensor *> &input_tensors,
-               const std::vector<backend::ITensor *> &output_tensors,
-               const compiler::TensorRegistries &tensor_regs);
+               const compiler::TensorRegistries &tensor_regs, const util::TracingCtx *tracing_ctx);
 
   virtual ~ExecutorBase() = default;
 
   const ir::Graph &graph() final { return _graph; }
 
-  /**
-   * @brief Execute without IODescription
-   *
-   * @param src_tensor Tensor list that will be copied to input tensors of this
-   * @param pre_fn The permutation function that copy from src_tensor to input tensors of this
-   */
-  void execute(const std::vector<backend::ITensor *> &src_tensors,
-               const std::shared_ptr<IPermuteFunction> &pre_fn);
-
   void execute(const IODescription &desc) final;
 
+  void execute(const std::vector<backend::IPortableTensor *> &inputs,
+               const std::vector<backend::IPortableTensor *> &outputs) override;
+
   // Used only in Dataflow and Parallel Executors
   void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>> ranks) final
   {
@@ -78,9 +72,10 @@ public:
 
   void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); };
 
-  const std::vector<backend::ITensor *> &getInputTensors() const { return _input_tensors; }
-
-  const std::vector<backend::ITensor *> &getOutputTensors() const { return _output_tensors; }
+  const std::vector<backend::controlflow::IOTensor *> &getOutputTensors() const override
+  {
+    return _output_tensors;
+  }
 
 protected:
   /**
@@ -93,9 +88,10 @@ protected:
   std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
   std::unique_ptr<compiler::LoweredGraph> _lowered_graph;
   const ir::Graph &_graph;
-  std::vector<backend::ITensor *> _input_tensors;
-  std::vector<backend::ITensor *> _output_tensors;
+  std::vector<backend::controlflow::IOTensor *> _input_tensors;
+  std::vector<backend::controlflow::IOTensor *> _output_tensors;
   std::mutex _mutex;
+  const util::TracingCtx *_tracing_ctx;
 
 private:
   void handleDynamicInputTensor(ir::IOIndex input_index, const IODescription &desc);
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.h b/runtime/onert/core/src/exec/IPermuteFunction.h
index 11017edc9..8f62156a6 100644
--- a/runtime/onert/core/src/exec/IPermuteFunction.h
+++ b/runtime/onert/core/src/exec/IPermuteFunction.h
@@ -120,7 +120,8 @@ protected:
     }
 
     assert(src_tensor != dst_tensor);
-    assert(underlying_type(src_tensor->data_type()) == underlying_type(dst_tensor->data_type()));
+    if (underlying_type(src_tensor->data_type()) != underlying_type(dst_tensor->data_type()))
+      throw std::runtime_error("data type does not match");
     switch (src_tensor->data_type())
     {
       case ir::DataType::FLOAT32:
diff --git a/runtime/onert/core/src/exec/JSONExecTime.cc b/runtime/onert/core/src/exec/JSONExecTime.cc
index 72a18def1..b29216a2f 100644
--- a/runtime/onert/core/src/exec/JSONExecTime.cc
+++ b/runtime/onert/core/src/exec/JSONExecTime.cc
@@ -135,7 +135,7 @@ void JSON::printOperation(const std::map<uint32_t, int64_t> &operation_info,
   stream.seekp(-2, std::ofstream::end);
 }
 
-void JSON::uploadOperationsExecTime() const
+void JSON::storeOperationsExecTime() const
 {
   std::ofstream stream(_measurement_file);
   if (!stream.is_open())
diff --git a/runtime/onert/core/src/exec/JSONExecTime.h b/runtime/onert/core/src/exec/JSONExecTime.h
index a64cb3133..8987d723c 100644
--- a/runtime/onert/core/src/exec/JSONExecTime.h
+++ b/runtime/onert/core/src/exec/JSONExecTime.h
@@ -54,18 +54,16 @@ public:
     loadOperationsExecTime();
   };
   /**
-   * @brief Update _operations_exec_time_file with new data.
+   * @brief Update _measurement_file with new data.
    */
-  void uploadOperationsExecTime() const;
+  void storeOperationsExecTime() const;
 
 private:
   ///@brief file containing measurements
   std::string _measurement_file;
   std::unordered_map<std::string, const backend::Backend *> _backends;
-  std::unordered_map<
-      const backend::Backend *,
-      std::unordered_map<std::string, std::unordered_map<bool, std::map<uint32_t, int64_t>>>>
-      &_measurements;
+  MeasurementData &_measurements;
+
   /**
    * @brief Helper function for inserting data to OperationExecTimes
    *
@@ -86,7 +84,7 @@ private:
   void printOperation(const std::map<uint32_t, int64_t> &operation_info,
                       std::ofstream &stream) const;
   /**
-   * @brief Parse and load operations_exec_time from _operations_exec_time_file.
+   * @brief Parse and load _measurements from _measurement_file.
    */
   void loadOperationsExecTime();
 };
diff --git a/runtime/onert/core/src/exec/LinearExecutor.cc b/runtime/onert/core/src/exec/LinearExecutor.cc
index 6e6ca110f..a6d447312 100644
--- a/runtime/onert/core/src/exec/LinearExecutor.cc
+++ b/runtime/onert/core/src/exec/LinearExecutor.cc
@@ -39,7 +39,9 @@ char *seq_to_label(const onert::ir::OpSequence *op_seq, const onert::ir::Operati
 
 void LinearExecutor::executeImpl()
 {
-  _subject.notifyModelBegin(this);
+  auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph);
+
+  _subject.notifySubgraphBegin(profiling_subg_index);
   for (auto &&code : _code)
   {
     const auto op_seq = code.op_seq;
@@ -48,7 +50,7 @@ void LinearExecutor::executeImpl()
 #ifdef RUY_PROFILER
     ruy::profiler::ScopeLabel label(seq_to_label(op_seq, _graph.operations()));
 #endif
-    _subject.notifyJobBegin(this, op_seq, backend);
+    _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend);
 
     auto &fn_seq = code.fn_seq;
 
@@ -58,9 +60,9 @@ void LinearExecutor::executeImpl()
     fn_seq->enableDynamicShapeInferer(handle_dynamic_tensor);
     fn_seq->run();
 
-    _subject.notifyJobEnd(this, op_seq, backend);
+    _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend);
   }
-  _subject.notifyModelEnd(this);
+  _subject.notifySubgraphEnd(profiling_subg_index);
 }
 
 } // namespace exec
diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h
index 22d00ec30..d43c97012 100644
--- a/runtime/onert/core/src/exec/LinearExecutor.h
+++ b/runtime/onert/core/src/exec/LinearExecutor.h
@@ -27,6 +27,7 @@
 #include "compiler/Linear.h"
 #include "exec/FunctionSequence.h"
 #include "compiler/CodeMap.h"
+#include "util/TracingCtx.h"
 
 namespace onert
 {
@@ -47,11 +48,9 @@ public:
    * @param code_map OpSequence and its code map
    */
   LinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
-                 const std::vector<backend::ITensor *> &input_tensors,
-                 const std::vector<backend::ITensor *> &output_tensors,
                  const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
-                 const std::vector<ir::OpSequenceIndex> &order)
-      : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs}
+                 const std::vector<ir::OpSequenceIndex> &order, const util::TracingCtx *tracing_ctx)
+      : ExecutorBase{std::move(lowered_graph), tensor_regs, tracing_ctx}
   {
     for (auto index : order)
     {
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.cc b/runtime/onert/core/src/exec/ParallelExecutor.cc
index 676bdb5fa..e9e576ce8 100644
--- a/runtime/onert/core/src/exec/ParallelExecutor.cc
+++ b/runtime/onert/core/src/exec/ParallelExecutor.cc
@@ -60,12 +60,10 @@ void ParallelExecutor::notify(uint32_t finished_job_id)
 }
 
 ParallelExecutor::ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
-                                   const std::vector<backend::ITensor *> &input_tensors,
-                                   const std::vector<backend::ITensor *> &output_tensors,
                                    const compiler::TensorRegistries &tensor_regs,
-                                   compiler::CodeMap &&code_map)
-    : DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
-                       std::move(code_map)}
+                                   compiler::CodeMap &&code_map,
+                                   const util::TracingCtx *tracing_ctx)
+    : DataflowExecutor{std::move(lowered_graph), tensor_regs, std::move(code_map), tracing_ctx}
 {
   VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl;
 }
@@ -100,7 +98,10 @@ void ParallelExecutor::executeImpl()
 
   VERBOSE(ParallelExecutor) << "INITIAL JOBS : " << _ready_jobs.size() << std::endl;
 
-  _subject.notifyModelBegin(this);
+  auto profiling_subg_index = _tracing_ctx->getSubgraphIndex(&_graph);
+
+  _subject.notifySubgraphBegin(profiling_subg_index);
+
   while (true)
   {
     std::unique_lock<std::mutex> lock{_mu_jobs};
@@ -126,9 +127,11 @@ void ParallelExecutor::executeImpl()
     auto op_sequence_index = _job_to_op_seq[job_index];
     auto op_seq = &_lowered_graph->op_seqs().at(op_sequence_index);
     auto backend = _lowered_graph->getLowerInfo()->op_seq.at(op_sequence_index)->backend();
-    auto setup = [&, op_seq, backend]() { _subject.notifyJobBegin(this, op_seq, backend); };
+    auto setup = [&, op_seq, backend]() {
+      _subject.notifyJobBegin(this, profiling_subg_index, op_seq, backend);
+    };
     auto teardown = [&, job_index, op_seq, backend]() {
-      _subject.notifyJobEnd(this, op_seq, backend);
+      _subject.notifyJobEnd(this, profiling_subg_index, op_seq, backend);
       notify(job_index);
     };
 
@@ -146,7 +149,7 @@ void ParallelExecutor::executeImpl()
 
   // Wait for all the jobs done
   _scheduler->finish();
-  _subject.notifyModelEnd(this);
+  _subject.notifySubgraphEnd(profiling_subg_index);
 
   // Reset input info for the next execution
   _input_info = _initial_input_info;
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h
index 111c20c0c..fd9db42e1 100644
--- a/runtime/onert/core/src/exec/ParallelExecutor.h
+++ b/runtime/onert/core/src/exec/ParallelExecutor.h
@@ -28,6 +28,7 @@
 #include <memory>
 #include "exec/DataflowExecutor.h"
 #include "ParallelScheduler.h"
+#include "util/TracingCtx.h"
 
 namespace onert
 {
@@ -51,9 +52,8 @@ public:
    * @param code_map OpSequence and its code map
    */
   ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
-                   const std::vector<backend::ITensor *> &input_tensors,
-                   const std::vector<backend::ITensor *> &output_tensors,
-                   const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map);
+                   const compiler::TensorRegistries &tensor_regs, compiler::CodeMap &&code_map,
+                   const util::TracingCtx *tracing_ctx);
 
   void executeImpl() override;
 
diff --git a/runtime/onert/core/src/interp/InterpExecutor.h b/runtime/onert/core/src/interp/InterpExecutor.h
index 2e3f3ca54..99d7b3af7 100644
--- a/runtime/onert/core/src/interp/InterpExecutor.h
+++ b/runtime/onert/core/src/interp/InterpExecutor.h
@@ -58,6 +58,15 @@ public:
    * @note   It should be called after setting input and output buffer
    */
   void execute(const exec::IODescription &desc) final;
+  void execute(const std::vector<backend::IPortableTensor *> &,
+               const std::vector<backend::IPortableTensor *> &) final
+  {
+    throw new std::runtime_error{"Interpreter does not support subgraph calls(control flow ops)"};
+  }
+  const std::vector<backend::controlflow::IOTensor *> &getOutputTensors() const final
+  {
+    throw new std::runtime_error{"Interpreter does not support this function."};
+  }
 
 private:
   const ir::Graph &_graph;
diff --git a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
index 0473855d9..e1fb767fe 100644
--- a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
+++ b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
@@ -116,7 +116,7 @@ void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor
   float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
 
   nnfw::cker::DepthwiseConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
-                            cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr);
+                            cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr, nullptr);
 }
 
 void invokeDepthwiseConv(const ExecEnv *env, const ir::Operation &node)
diff --git a/runtime/onert/core/src/ir/DataType.cc b/runtime/onert/core/src/ir/DataType.cc
index 9eedcd21a..8e75c4f53 100644
--- a/runtime/onert/core/src/ir/DataType.cc
+++ b/runtime/onert/core/src/ir/DataType.cc
@@ -42,6 +42,7 @@ size_t sizeOfDataType(DataType data_type)
       return sizeof(uint8_t);
     case DataType::QUANT_INT8_SYMM:
     case DataType::QUANT_INT8_ASYMM:
+    case DataType::QUANT_INT8_SYMM_PER_CHANNEL:
       return sizeof(int8_t);
     case DataType::FLOAT16:
       return sizeof(float16);
diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc
index eecfe81cc..a8578b4ce 100644
--- a/runtime/onert/core/src/ir/OperationDumper.cc
+++ b/runtime/onert/core/src/ir/OperationDumper.cc
@@ -72,7 +72,14 @@ OperationDumper::OperationDumper(const std::string &start_msg)
   VERBOSE(LIR) << start_msg << std::endl;
 }
 
-void OperationDumper::visit(const ArgMax &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const ArgMinMax &node)
+{
+  std::string min_max = node.param().is_arg_max ? "(Max)" : "(Min)";
+  VERBOSE(LIR) << "* " << node.name() << min_max << std::endl;
+  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ArgMinMax::INPUT) << ") Axis("
+               << node.getInputs().at(ArgMinMax::AXIS) << ") " << std::endl;
+  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+}
 
 void OperationDumper::visit(const BatchToSpaceND &node)
 {
@@ -159,6 +166,14 @@ void OperationDumper::visit(const ExpandDims &node)
   dumpUnaryInputOp(node, axis);
 }
 
+void OperationDumper::visit(const Fill &node)
+{
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
+  VERBOSE(LIR) << "  - Inputs : Shape(" << node.getInputs().at(Fill::Input::SHAPE) << ") Value("
+               << node.getInputs().at(Fill::Input::VALUE) << ")" << std::endl;
+  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+}
+
 void OperationDumper::visit(const FullyConnected &node)
 {
   std::string inputs =
@@ -505,7 +520,7 @@ void OperationDumper::visit(const While &node)
   }
   VERBOSE(LIR) << "  - Inputs : "
                << "Cond subgraph (" << node.param().cond_subg_index << ") Body subgraph ("
-               << node.param().cond_subg_index << ") Inputs(" << inputs << ")" << std::endl;
+               << node.param().body_subg_index << ") Inputs(" << inputs << ")" << std::endl;
   std::string outputs;
   const auto &output_indices = node.getOutputs();
   for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it)
diff --git a/runtime/onert/core/src/ir/OperationDumper.h b/runtime/onert/core/src/ir/OperationDumper.h
index 91642ab13..fe18307b9 100644
--- a/runtime/onert/core/src/ir/OperationDumper.h
+++ b/runtime/onert/core/src/ir/OperationDumper.h
@@ -31,7 +31,7 @@ public:
   OperationDumper(const std::string &start_msg);
 
 public:
-  void visit(const operation::ArgMax &) override;
+  void visit(const operation::ArgMinMax &) override;
   void visit(const operation::BatchToSpaceND &node) override;
   void visit(const operation::BCQFullyConnected &node) override;
   void visit(const operation::BinaryArithmetic &node) override;
@@ -48,6 +48,7 @@ public:
   void visit(const operation::ElementwiseUnary &) override;
   void visit(const operation::EmbeddingLookup &) override;
   void visit(const operation::ExpandDims &) override;
+  void visit(const operation::Fill &) override;
   void visit(const operation::FullyConnected &node) override;
   void visit(const operation::Gather &) override;
   void visit(const operation::HashtableLookup &) override;
diff --git a/runtime/onert/core/src/ir/OperationValidator.cc b/runtime/onert/core/src/ir/OperationValidator.cc
index da08e81fc..6f81c2a56 100644
--- a/runtime/onert/core/src/ir/OperationValidator.cc
+++ b/runtime/onert/core/src/ir/OperationValidator.cc
@@ -55,6 +55,17 @@ bool OperationValidator::isSameType(const OperandIndex &idx1, const OperandIndex
   return operandType(idx1) == operandType(idx2);
 }
 
+bool OperationValidator::isSameQuantParam(const OperandIndex &idx1, const OperandIndex &idx2)
+{
+  if (_operands.at(idx1).typeInfo().scale() != _operands.at(idx2).typeInfo().scale())
+    return false;
+
+  if (_operands.at(idx1).typeInfo().offset() != _operands.at(idx2).typeInfo().offset())
+    return false;
+
+  return true;
+}
+
 bool OperationValidator::isValidType(const OperandIndex &idx, const DataType &type)
 {
   return operandType(idx) == type;
@@ -76,29 +87,54 @@ bool OperationValidator::isValidType(const OperandIndex &idx,
 
 void OperationValidator::visit(const operation::AddN &node)
 {
+  const auto output_index(node.getOutputs().at(0));
+
   int size = node.getInputs().size();
   for (int i = 0; i < size; i++)
   {
     const auto input_index(node.getInputs().at(i));
     OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32}));
+    OP_REQUIRES(isSameType(input_index, output_index));
   }
 }
 
+void OperationValidator::visit(const operation::ArgMinMax &node)
+{
+  const auto input_index(node.getInputs().at(operation::ArgMinMax::Input::INPUT));
+  const auto axis_index(node.getInputs().at(operation::ArgMinMax::Input::AXIS));
+  const auto output_index(node.getOutputs().at(0));
+  const auto output_type = node.param().output_type;
+
+  OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32, DataType::UINT8,
+                                        DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+  OP_REQUIRES(isValidType(axis_index, {DataType::INT32, DataType::INT64}));
+  OP_REQUIRES(isValidType(output_index, {DataType::INT32, DataType::INT64}));
+  OP_REQUIRES(isValidType(output_index, output_type));
+}
+
 void OperationValidator::visit(const operation::BatchMatMul &node)
 {
   const auto lhs_index(node.getInputs().at(operation::BatchMatMul::Input::LHS));
   const auto rhs_index(node.getInputs().at(operation::BatchMatMul::Input::RHS));
+  const auto output_index(node.getOutputs().at(0));
 
   // Constant lhs and rhs is not implemented yet
   OP_REQUIRES(!isConstant(lhs_index) && !isConstant(rhs_index));
+
+  // Allow hybrid quantization (lhs: float / rhs: qint8 / out: float)
+  OP_REQUIRES(isValidType(lhs_index, {DataType::FLOAT32, DataType::QUANT_INT8_ASYMM}));
+  OP_REQUIRES(isSameType(lhs_index, rhs_index) ||
+              ((operandType(lhs_index) == DataType::FLOAT32) &&
+               (operandType(rhs_index) == DataType::QUANT_INT8_ASYMM)));
+  OP_REQUIRES(isSameType(lhs_index, output_index));
 }
 
 void OperationValidator::visit(const operation::BatchToSpaceND &node)
 {
-  const auto block_size_index{node.getInputs().at(operation::BatchToSpaceND::Input::BLOCK_SIZE)};
+  const auto input_index{node.getInputs().at(operation::BatchToSpaceND::Input::INPUT)};
+  const auto output_index{node.getOutputs().at(0)};
 
-  // Non-constant block_size is not implemented yet
-  OP_REQUIRES(isConstant(block_size_index));
+  OP_REQUIRES(isSameType(input_index, output_index));
 }
 
 void OperationValidator::visit(const operation::BinaryArithmetic &node)
@@ -122,10 +158,48 @@ void OperationValidator::visit(const operation::Comparison &node)
   OP_REQUIRES(isValidType(output_index, DataType::BOOL8));
 }
 
+void OperationValidator::visit(const operation::Concat &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+
+  for (auto input_index : node.getInputs())
+  {
+    OP_REQUIRES(isSameType(input_index, output_index));
+
+    // Int8 quantization requires same scale and zero point
+    if (isValidType(output_index, DataType::QUANT_INT8_ASYMM))
+    {
+      OP_REQUIRES(isSameQuantParam(input_index, output_index));
+    }
+  }
+}
+
+void OperationValidator::visit(const operation::Conv2D &node)
+{
+  const auto input_index{node.getInputs().at(operation::Conv2D::Input::INPUT)};
+  const auto output_index{node.getOutputs().at(0)};
+
+  uint32_t stride_horizontal = node.param().stride.horizontal;
+  uint32_t stride_vertical = node.param().stride.vertical;
+  uint32_t dilation_width = node.param().dilation.width_factor;
+  uint32_t dilation_height = node.param().dilation.height_factor;
+
+  OP_REQUIRES((stride_horizontal > 0) && (stride_vertical > 0));
+  OP_REQUIRES((dilation_width > 0) && (dilation_height > 0));
+  OP_REQUIRES(isSameType(input_index, output_index));
+}
+
 void OperationValidator::visit(const operation::DepthToSpace &node)
 {
+  const auto input_index{node.getInputs().at(operation::DepthToSpace::Input::INPUT)};
+  const auto output_index{node.getOutputs().at(0)};
+
   int32_t block_size = node.param().block_size;
 
+  OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::INT32, DataType::INT64,
+                                        DataType::QUANT_UINT8_ASYMM, DataType::QUANT_INT8_ASYMM}));
+  OP_REQUIRES(isSameType(input_index, output_index));
+
   OP_REQUIRES(block_size > 0);
 }
 
@@ -151,6 +225,32 @@ void OperationValidator::visit(const operation::ElementwiseActivation &node)
 
   // Check if I/O types match
   OP_REQUIRES(isSameType(output_index, input_index));
+
+  switch (node.param().op_type)
+  {
+    case operation::ElementwiseActivation::Type::ELU:
+      OP_REQUIRES(isValidType(input_index, DataType::FLOAT32));
+      break;
+    case operation::ElementwiseActivation::Type::LEAKY_RELU:
+      OP_REQUIRES(
+          isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+                                    DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+      break;
+    case operation::ElementwiseActivation::Type::LOGISTIC:
+      OP_REQUIRES(
+          isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+                                    DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+      break;
+    case operation::ElementwiseActivation::Type::RELU:
+      OP_REQUIRES(isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+                                            DataType::QUANT_INT8_ASYMM}));
+      break;
+    case operation::ElementwiseActivation::Type::TANH:
+      OP_REQUIRES(
+          isValidType(input_index, {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM,
+                                    DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT16_ASYMM}));
+      break;
+  }
 }
 
 void OperationValidator::visit(const operation::ElementwiseBinary &node)
@@ -161,6 +261,13 @@ void OperationValidator::visit(const operation::ElementwiseBinary &node)
 
   OP_REQUIRES(isSameType(lhs_index, rhs_index));
   OP_REQUIRES(isSameType(lhs_index, output_index));
+
+  const auto op_type = node.param().op_type;
+  if (op_type == operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND ||
+      op_type == operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR)
+  {
+    OP_REQUIRES(isValidType(lhs_index, DataType::BOOL8));
+  }
 }
 
 void OperationValidator::visit(const operation::ElementwiseUnary &node)
@@ -195,8 +302,17 @@ void OperationValidator::visit(const operation::ElementwiseUnary &node)
 void OperationValidator::visit(const operation::EmbeddingLookup &node)
 {
   const auto lookups_index{node.getInputs().at(operation::EmbeddingLookup::Input::LOOKUPS)};
+  const auto values_index{node.getInputs().at(operation::EmbeddingLookup::Input::VALUES)};
+  const auto output_index{node.getOutputs().at(0)};
 
   OP_REQUIRES(isValidType(lookups_index, DataType::INT32));
+
+  // TFLite: Allow hybrid type - value table & output
+  // NNAPI: Require same value table and output type
+  OP_REQUIRES(
+      isSameType(values_index, output_index) ||
+      (isValidType(output_index, DataType::FLOAT32) &&
+       (isValidType(values_index, {DataType::QUANT_INT8_ASYMM, DataType::QUANT_INT8_SYMM}))));
 }
 
 void OperationValidator::visit(const operation::ExpandDims &node)
@@ -206,7 +322,19 @@ void OperationValidator::visit(const operation::ExpandDims &node)
   const auto axis_index{node.getInputs().at(operation::ExpandDims::Input::AXIS)};
 
   OP_REQUIRES(isSameType(output_index, input_index));
-  OP_REQUIRES(isValidType(axis_index, DataType::INT32));
+  OP_REQUIRES(isValidType(axis_index, {DataType::INT32, DataType::INT64}));
+}
+
+void OperationValidator::visit(const operation::Fill &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(operation::Fill::Input::SHAPE)};
+  const auto value_index{node.getInputs().at(operation::Fill::Input::VALUE)};
+
+  OP_REQUIRES(isSameType(output_index, value_index));
+  OP_REQUIRES(isValidType(input_index, {DataType::INT32, DataType::INT64}));
+  OP_REQUIRES(isValidType(output_index,
+                          {DataType::FLOAT32, DataType::INT32, DataType::INT64, DataType::BOOL8}));
 }
 
 void OperationValidator::visit(const operation::HashtableLookup &node)
diff --git a/runtime/onert/core/src/ir/OperationValidator.h b/runtime/onert/core/src/ir/OperationValidator.h
index 2ea8000e5..5b95b16ba 100644
--- a/runtime/onert/core/src/ir/OperationValidator.h
+++ b/runtime/onert/core/src/ir/OperationValidator.h
@@ -44,10 +44,13 @@ public:
 
 public:
   void visit(const operation::AddN &node) override;
+  void visit(const operation::ArgMinMax &node) override;
   void visit(const operation::BatchMatMul &node) override;
   void visit(const operation::BatchToSpaceND &node) override;
   void visit(const operation::BinaryArithmetic &node) override;
   void visit(const operation::Comparison &node) override;
+  void visit(const operation::Concat &node) override;
+  void visit(const operation::Conv2D &node) override;
   void visit(const operation::DepthToSpace &node) override;
   void visit(const operation::DepthwiseConv2D &node) override;
   void visit(const operation::ElementwiseActivation &node) override;
@@ -55,6 +58,7 @@ public:
   void visit(const operation::ElementwiseUnary &node) override;
   void visit(const operation::EmbeddingLookup &node) override;
   void visit(const operation::ExpandDims &node) override;
+  void visit(const operation::Fill &node) override;
   void visit(const operation::HashtableLookup &node) override;
   void visit(const operation::Pack &node) override;
   void visit(const operation::Pad &node) override;
@@ -76,6 +80,7 @@ private:
   DataType operandType(const OperandIndex &idx);
   bool isConstant(const OperandIndex &idx);
   bool isSameType(const OperandIndex &idx1, const OperandIndex &idx2);
+  bool isSameQuantParam(const OperandIndex &idx1, const OperandIndex &idx2);
   bool isValidType(const OperandIndex &idx, const DataType &type);
   bool isValidType(const OperandIndex &idx, std::initializer_list<DataType> valid_types);
 
diff --git a/runtime/onert/core/src/ir/operation/ArgMax.cc b/runtime/onert/core/src/ir/operation/ArgMinMax.cc
index f3bd8fd73..989d905bf 100644
--- a/runtime/onert/core/src/ir/operation/ArgMax.cc
+++ b/runtime/onert/core/src/ir/operation/ArgMinMax.cc
@@ -14,10 +14,7 @@
  * limitations under the License.
  */
 
-#include "ir/operation/ArgMax.h"
-
-#include <cassert>
-
+#include "ir/operation/ArgMinMax.h"
 #include "ir/OperationVisitor.h"
 
 namespace onert
@@ -27,10 +24,10 @@ namespace ir
 namespace operation
 {
 
-void ArgMax::accept(OperationVisitor &v) const { v.visit(*this); }
+void ArgMinMax::accept(OperationVisitor &v) const { v.visit(*this); }
 
-ArgMax::ArgMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-               const Param &param)
+ArgMinMax::ArgMinMax(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+                     const Param &param)
     : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
 {
 }
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
index 6a0be7eb8..20b6fa124 100644
--- a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
+++ b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
@@ -57,7 +57,7 @@ std::string ElementwiseUnary::name() const
       {ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}},
       {ElementwiseUnaryType::SIN, std::string{"Sin"}},
       {ElementwiseUnaryType::SQRT, std::string{"Sqrt"}},
-      {ElementwiseUnaryType::SQURE, std::string{"Squre"}},
+      {ElementwiseUnaryType::SQUARE, std::string{"Square"}},
       {ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}};
   return name_map.at(_param.op_type);
 }
diff --git a/runtime/onert/core/src/util/ConfigSource.cc b/runtime/onert/core/src/util/ConfigSource.cc
index 45cce662e..9da93f68a 100644
--- a/runtime/onert/core/src/util/ConfigSource.cc
+++ b/runtime/onert/core/src/util/ConfigSource.cc
@@ -30,8 +30,10 @@ namespace util
 {
 
 static std::unique_ptr<IConfigSource> _source;
+static std::unique_ptr<IConfigSource> _source_ext;
 
 void config_source(std::unique_ptr<IConfigSource> &&source) { _source = std::move(source); }
+void config_source_ext(std::unique_ptr<IConfigSource> &&source) { _source_ext = std::move(source); }
 
 static IConfigSource *config_source()
 {
@@ -67,6 +69,15 @@ static std::string getConfigOrDefault(const std::string &key)
   auto ret = config_source()->get(key);
   if (ret.empty())
   {
+    // if env is not set, search from external
+    if (_source_ext.get())
+    {
+      ret = _source_ext.get()->get(key);
+    }
+  }
+  // if not found search from defaults
+  if (ret.empty())
+  {
     auto itr = defaults.find(key);
     if (itr != defaults.end())
     {
diff --git a/runtime/onert/core/src/util/EventCollector.cc b/runtime/onert/core/src/util/EventCollector.cc
index de37276bf..fd5618714 100644
--- a/runtime/onert/core/src/util/EventCollector.cc
+++ b/runtime/onert/core/src/util/EventCollector.cc
@@ -38,15 +38,17 @@ class DurationEventBuilder
 public:
   DurationEventBuilder(const std::string &ts) : _ts{ts} {}
 
-  DurationEvent build(const std::string &tid, const std::string &name, const std::string &ph) const
+  DurationEvent build(const EventCollector::Event &evt_collected, const std::string &ph) const
   {
     DurationEvent evt;
 
-    evt.name = name;
-    evt.tid = tid;
+    evt.name = evt_collected.label;
+    evt.tid = evt_collected.backend;
     evt.ph = ph;
     evt.ts = _ts;
 
+    evt.args = evt_collected.userData;
+
     return evt;
   }
 
@@ -93,11 +95,11 @@ void EventCollector::onEvent(const Event &event)
   switch (event.edge)
   {
     case Edge::BEGIN:
-      _rec->emit(DurationEventBuilder(ts).build(event.backend, event.label, "B"));
+      _rec->emit(DurationEventBuilder(ts).build(event, "B"));
       break;
 
     case Edge::END:
-      _rec->emit(DurationEventBuilder(ts).build(event.backend, event.label, "E"));
+      _rec->emit(DurationEventBuilder(ts).build(event, "E"));
       break;
   }
 
diff --git a/runtime/onert/core/src/util/EventCollector.h b/runtime/onert/core/src/util/EventCollector.h
index 8154be592..7daa4851f 100644
--- a/runtime/onert/core/src/util/EventCollector.h
+++ b/runtime/onert/core/src/util/EventCollector.h
@@ -19,6 +19,10 @@
 
 #include "util/EventRecorder.h"
 
+#include <vector>
+#include <utility>
+#include <string>
+
 class EventCollector
 {
 public:
@@ -31,8 +35,24 @@ public:
   struct Event
   {
     Edge edge;
+    uint32_t session_index;
+    uint32_t subg_index;
     std::string backend;
+    uint32_t op_index;
+    std::string op_name;
+    uint32_t op_seq_size; // if this event is for an operation sequence of multiple operations
+
+    // TODO deprecate this. label can be differ by writer. So let the writer decide label.
     std::string label;
+
+    // user-defined data: pairs of (key, value)
+    std::vector<std::pair<std::string, std::string>> userData;
+
+    Event(Edge a_edge, const std::string &a_backend, const std::string &a_label)
+        : edge(a_edge), session_index(0), subg_index(0), backend(a_backend), op_index(0),
+          op_seq_size(0), label(a_label)
+    { /* empty */
+    }
   };
 
 public:
diff --git a/runtime/onert/core/src/util/EventCollectorGlobal.cc b/runtime/onert/core/src/util/EventCollectorGlobal.cc
deleted file mode 100644
index 6c03a5b9a..000000000
--- a/runtime/onert/core/src/util/EventCollectorGlobal.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/EventCollectorGlobal.h"
-
-#include <cassert>
-#include <fstream>
-#include <iostream>
-
-#include "util/ConfigSource.h"
-#include "util/EventWriter.h"
-
-namespace onert
-{
-namespace util
-{
-
-EventCollectorGlobal::EventCollectorGlobal() : _recorder{}, _collector{&_recorder}
-{
-  // DO NOTHING
-}
-
-EventCollectorGlobal::~EventCollectorGlobal()
-{
-  if (!_recorder.empty())
-  {
-    try
-    {
-      // TODO Need better way for saved file path than the hardcoded path
-      EventWriter{_recorder}.writeToFile("trace.global.json",
-                                         EventWriter::WriteFormat::CHROME_TRACING);
-    }
-    catch (const std::exception &e)
-    {
-      std::cerr << "E: Fail to record event in EventCollectorGlobal: " << e.what() << std::endl;
-    }
-  }
-}
-
-EventCollectorGlobal &EventCollectorGlobal::get()
-{
-  static EventCollectorGlobal instance;
-  return instance;
-}
-
-EventDurationBlock::EventDurationBlock(const std::string &tag) : _tag{tag}
-{
-  auto &glob = EventCollectorGlobal::get();
-  glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag});
-}
-EventDurationBlock::~EventDurationBlock()
-{
-  auto &glob = EventCollectorGlobal::get();
-  glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag});
-}
-
-EventDurationManual::EventDurationManual(const std::string &tag) : _tag{tag}, _pair{true} {}
-
-EventDurationManual::~EventDurationManual()
-{
-  // Check if it has called begin-end pair
-  assert(_pair);
-}
-
-void EventDurationManual::begin()
-{
-  _pair = false;
-  auto &glob = EventCollectorGlobal::get();
-  glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::BEGIN, "0", _tag});
-}
-
-void EventDurationManual::end()
-{
-  assert(!_pair);
-  _pair = true;
-  auto &glob = EventCollectorGlobal::get();
-  glob.collector().onEvent(EventCollector::Event{EventCollector::Edge::END, "0", _tag});
-}
-
-} // namespace util
-} // namespace onert
diff --git a/runtime/onert/core/src/util/EventCollectorGlobal.h b/runtime/onert/core/src/util/EventCollectorGlobal.h
deleted file mode 100644
index 1027ec84d..000000000
--- a/runtime/onert/core/src/util/EventCollectorGlobal.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__
-#define __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__
-
-#include "util/EventRecorder.h"
-#include "util/EventCollector.h"
-
-namespace onert
-{
-namespace util
-{
-
-/**
- * @brief Singleton class for event collection from anywhere in code
- *
- */
-class EventCollectorGlobal
-{
-public:
-  /**
-   * @brief Get the singleton object of this class
-   *
-   * @return EventCollectorGlobal& Singleton object
-   */
-  static EventCollectorGlobal &get();
-
-public:
-  /**
-   * @brief Getter for event collector object
-   *
-   * @return EventCollector& Collector object
-   */
-  EventCollector &collector() { return _collector; }
-
-private:
-  EventCollectorGlobal();
-  ~EventCollectorGlobal();
-
-private:
-  EventRecorder _recorder;
-  EventCollector _collector;
-};
-
-/**
- * @brief Helper class for emitting duration event which is handled automatically with ctor/dtor
- *
- */
-class EventDurationBlock
-{
-public:
-  /**
-   * @brief Raise a duration event with type of BEGIN
-   *
-   * @param tag A label for the duration event
-   */
-  EventDurationBlock(const std::string &tag);
-  /**
-   * @brief Raise a duration event with type of END
-   *
-   */
-  ~EventDurationBlock();
-
-private:
-  std::string _tag;
-};
-
-/**
- * @brief Helper class for emitting duration event which is handled manually
- *
- *        Usage:
- *        {
- *          ...
- *          EventDurationManual duration("some tag");
- *          duration.begin();
- *          ...
- *          ... // Code for duration
- *          ...
- *          duration.end();
- *        }
- *
- */
-class EventDurationManual
-{
-public:
-  /**
-   * @brief Construct a new Event Duration Manual object
-   *
-   * @param tag A label for the duration object
-   */
-  EventDurationManual(const std::string &tag);
-  /**
-   * @brief Destroy the Event Duration Manual object
-   *
-   */
-  ~EventDurationManual();
-
-  /**
-   * @brief Raise a duration event with type of BEGIN
-   *
-   */
-  void begin();
-  /**
-   * @brief Raise a duration event with type of END
-   *
-   */
-  void end();
-
-private:
-  std::string _tag;
-  bool _pair;
-};
-
-} // namespace util
-} // namespace onert
-
-/**
- * Helper Macro Definitions
- *
- * HOW TO USE
- *
- * void f(args)
- * {
- *   EVENT_DURATION_FUNCTION();
- *   ...
- *   if(cond)
- *   {
- *     EVENT_DURATION_REGION("if branch");
- *     ...
- *   }
- *   ...
- * }
- */
-
-#define EVENT_DURATION_FUNCTION() \
-  ::onert::util::EventDurationBlock __event_duration__##__LINE__ { __FUNCTION__ }
-
-#define EVENT_DURATION_REGION(tag) \
-  ::onert::util::EventDurationBlock __event_duration__##__LINE__ { tag }
-
-#endif // __ONERT_UTIL_EVENT_COLLECTOR_GLOBAL_H__
diff --git a/runtime/onert/core/src/util/EventRecorder.h b/runtime/onert/core/src/util/EventRecorder.h
index 7af4c7ddb..3ed40875f 100644
--- a/runtime/onert/core/src/util/EventRecorder.h
+++ b/runtime/onert/core/src/util/EventRecorder.h
@@ -27,8 +27,9 @@ struct Event
 {
   std::string name;
   std::string tid;
-  std::string ph; /* REQUIRED */
-  std::string ts; /* REQUIRED */
+  std::string ph;                                        /* REQUIRED */
+  std::string ts;                                        /* REQUIRED */
+  std::vector<std::pair<std::string, std::string>> args; // user-defined data: pairs of (key, value)
 };
 
 struct DurationEvent : public Event
diff --git a/runtime/onert/core/src/util/EventWriter.cc b/runtime/onert/core/src/util/EventWriter.cc
index dacb40e64..8760a16db 100644
--- a/runtime/onert/core/src/util/EventWriter.cc
+++ b/runtime/onert/core/src/util/EventWriter.cc
@@ -89,6 +89,7 @@ void fill(Content &content, const Event &evt)
   content.flds.emplace_back("tid", evt.tid);
   content.flds.emplace_back("ph", evt.ph);
   content.flds.emplace_back("ts", evt.ts);
+  content.args = evt.args;
 }
 
 std::string object(const DurationEvent &evt)
@@ -418,40 +419,7 @@ struct MDTableBuilder
 
 } // namespace
 
-EventWriter::EventWriter(const EventRecorder &recorder) : _recorder(recorder)
-{
-  // DO NOTHING
-}
-
-void EventWriter::writeToFiles(const std::string &base_filepath)
-{
-  // Note. According to an internal issue, let snpe json as just file name not '.snpe.json'
-  writeToFile(base_filepath, WriteFormat::SNPE_BENCHMARK);
-  writeToFile(base_filepath + ".chrome.json", WriteFormat::CHROME_TRACING);
-  writeToFile(base_filepath + ".table.md", WriteFormat::MD_TABLE);
-}
-
-void EventWriter::writeToFile(const std::string &filepath, WriteFormat write_format)
-{
-  std::ofstream os{filepath, std::ofstream::out};
-  switch (write_format)
-  {
-    case WriteFormat::CHROME_TRACING:
-      writeChromeTrace(os);
-      break;
-    case WriteFormat::SNPE_BENCHMARK:
-      writeSNPEBenchmark(os);
-      break;
-    case WriteFormat::MD_TABLE:
-      writeMDTable(os);
-      break;
-    default:
-      assert(!"Invalid value");
-      break;
-  }
-}
-
-void EventWriter::writeSNPEBenchmark(std::ostream &os)
+void SNPEWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders)
 {
   Json::Value root;
   auto &exec_data = root["Execution_Data"] = Json::Value{Json::objectValue};
@@ -475,11 +443,14 @@ void EventWriter::writeSNPEBenchmark(std::ostream &os)
   // Memory
   {
     std::unordered_map<std::string, Stat> mem_stats;
-    for (auto &evt : _recorder.counter_events())
+    for (auto &recorder : recorders)
     {
-      auto &mem_stat = mem_stats[evt.name];
-      uint64_t val = std::stoull(evt.values.at("value"));
-      mem_stat.accumulate(val);
+      for (auto &evt : recorder->counter_events())
+      {
+        auto &mem_stat = mem_stats[evt.name];
+        uint64_t val = std::stoull(evt.values.at("value"));
+        mem_stat.accumulate(val);
+      }
     }
 
     auto &mem = exec_data["memory"] = Json::Value{Json::objectValue};
@@ -501,26 +472,29 @@ void EventWriter::writeSNPEBenchmark(std::ostream &os)
     // 2D keys : stats[tid][name]
     std::unordered_map<std::string, std::unordered_map<std::string, Stat>> stats;
     std::unordered_map<std::string, std::unordered_map<std::string, uint64_t>> begin_timestamps;
-    for (auto &evt : _recorder.duration_events())
+    for (auto &recorder : recorders)
     {
-      auto &stat = stats[evt.tid][evt.name];
-      auto &begin_ts = begin_timestamps[evt.tid][evt.name];
-      uint64_t timestamp = std::stoull(evt.ts);
-      if (evt.ph == "B")
+      for (auto &evt : recorder->duration_events())
       {
-        if (begin_ts != 0)
-          throw std::runtime_error{"Invalid Data"};
-        begin_ts = timestamp;
-      }
-      else if (evt.ph == "E")
-      {
-        if (begin_ts == 0 || timestamp < begin_ts)
-          throw std::runtime_error{"Invalid Data"};
-        stat.accumulate(timestamp - begin_ts);
-        begin_ts = 0;
+        auto &stat = stats[evt.tid][evt.name];
+        auto &begin_ts = begin_timestamps[evt.tid][evt.name];
+        uint64_t timestamp = std::stoull(evt.ts);
+        if (evt.ph == "B")
+        {
+          if (begin_ts != 0)
+            throw std::runtime_error{"Invalid Data"};
+          begin_ts = timestamp;
+        }
+        else if (evt.ph == "E")
+        {
+          if (begin_ts == 0 || timestamp < begin_ts)
+            throw std::runtime_error{"Invalid Data"};
+          stat.accumulate(timestamp - begin_ts);
+          begin_ts = 0;
+        }
+        else
+          throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt.ph + "\""};
       }
-      else
-        throw std::runtime_error{"Invalid Data - invalid value for \"ph\" : \"" + evt.ph + "\""};
     }
 
     for (auto &kv : begin_timestamps)
@@ -545,30 +519,71 @@ void EventWriter::writeSNPEBenchmark(std::ostream &os)
     }
   }
 
-  os << root;
+  _os << root;
 }
 
-void EventWriter::writeChromeTrace(std::ostream &os)
+void ChromeTracingWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &recorders)
 {
-  os << "{\n";
-  os << "  " << quote("traceEvents") << ": [\n";
+  _os << "{\n";
+  _os << "  " << quote("traceEvents") << ": [\n";
 
-  for (auto &evt : _recorder.duration_events())
+  for (auto &recorder : recorders)
   {
-    os << "    " << object(evt) << ",\n";
+    flushOneRecord(*recorder);
   }
 
-  for (auto &evt : _recorder.counter_events())
+  _os << "    { }\n";
+  _os << "  ]\n";
+  _os << "}\n";
+}
+
+void ChromeTracingWriter::flushOneRecord(const EventRecorder &recorder)
+{
+  for (auto &evt : recorder.duration_events())
   {
-    os << "    " << object(evt) << ",\n";
+    _os << "    " << object(evt) << ",\n";
   }
 
-  os << "    { }\n";
-  os << "  ]\n";
-  os << "}\n";
+  for (auto &evt : recorder.counter_events())
+  {
+    _os << "    " << object(evt) << ",\n";
+  }
 }
 
-void EventWriter::writeMDTable(std::ostream &os)
+void MDTableWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &records)
+{
+  for (auto &recorder : records)
+  {
+    MDTableBuilder(recorder->duration_events(), recorder->counter_events()).build().write(_os);
+  }
+}
+
+// initialization
+std::mutex EventWriter::_mutex;
+
+void EventWriter::readyToFlush(std::unique_ptr<EventRecorder> &&recorder)
 {
-  MDTableBuilder(_recorder.duration_events(), _recorder.counter_events()).build().write(os);
+  {
+    std::unique_lock<std::mutex> lock{_mutex};
+
+    _recorders.emplace_back(std::move(recorder));
+
+    if (--_ref_count > 0)
+      return;
+  }
+  // The caller of this method is the last instance that uses EventWriter.
+  // Let's write log files.
+
+  // Note. According to an internal issue, let snpe json as just file name not '.snpe.json'
+  flush(WriteFormat::SNPE_BENCHMARK);
+  flush(WriteFormat::CHROME_TRACING);
+  flush(WriteFormat::MD_TABLE);
+}
+
+void EventWriter::flush(WriteFormat write_format)
+{
+  auto *writer = _actual_writers[write_format].get();
+  assert(writer);
+
+  writer->flush(_recorders);
 }
diff --git a/runtime/onert/core/src/util/EventWriter.h b/runtime/onert/core/src/util/EventWriter.h
index 7e838ca82..0dcd00be6 100644
--- a/runtime/onert/core/src/util/EventWriter.h
+++ b/runtime/onert/core/src/util/EventWriter.h
@@ -20,7 +20,49 @@
 #include "EventRecorder.h"
 
 #include <string>
-#include <ostream>
+#include <vector>
+#include <unordered_map>
+#include <mutex>
+#include <fstream>
+
+class EventFormatWriter
+{
+public:
+  EventFormatWriter(const std::string &filepath) : _os{filepath, std::ofstream::out} {}
+  virtual ~EventFormatWriter() { /* empty */}
+
+  virtual void flush(const std::vector<std::unique_ptr<EventRecorder>> &) = 0;
+
+protected:
+  std::ofstream _os;
+};
+
+class SNPEWriter : public EventFormatWriter
+{
+public:
+  SNPEWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */}
+  void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
+};
+
+class ChromeTracingWriter : public EventFormatWriter
+{
+public:
+  ChromeTracingWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */}
+  void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
+
+private:
+  void flushOneRecord(const EventRecorder &);
+};
+
+class MDTableWriter : public EventFormatWriter
+{
+public:
+  MDTableWriter(const std::string &filepath) : EventFormatWriter(filepath) { /* empty */}
+  void flush(const std::vector<std::unique_ptr<EventRecorder>> &) override;
+
+private:
+  void flushOneRecord(const EventRecorder &);
+};
 
 class EventWriter
 {
@@ -32,20 +74,58 @@ public:
     MD_TABLE,
   };
 
-public:
-  EventWriter(const EventRecorder &recorder);
+  /**
+   * @brief Retuens a singleton object
+   */
+  static EventWriter *get(const std::string &filename)
+  {
+    std::unique_lock<std::mutex> lock{_mutex};
 
-public:
-  void writeToFiles(const std::string &base_filepath);
-  void writeToFile(const std::string &filepath, WriteFormat write_format);
+    static EventWriter singleton(filename);
+    return &singleton;
+  }
+
+  /**
+   * @brief Call this when observer which use EventWriter starts
+   */
+  void startToUse()
+  {
+    std::unique_lock<std::mutex> lock{_mutex};
+    _ref_count++;
+  }
+
+  /**
+   * @brief Call this when observer which use EventWriter finishes.
+   *        After multiple observers calls this method, the reference count will eventually be 0.
+   *        Then, EventWriter will write profiling result file.
+   */
+  void readyToFlush(std::unique_ptr<EventRecorder> &&recorder);
 
 private:
-  void writeSNPEBenchmark(std::ostream &os);
-  void writeChromeTrace(std::ostream &os);
-  void writeMDTable(std::ostream &os);
+  EventWriter(const std::string &filepath) : _ref_count(0)
+  {
+    std::string snpe_log_name(filepath);
+    std::string chrome_tracing_log_name(filepath + ".chrome.json");
+    std::string md_table_log_name(filepath + ".table.md");
+
+    _actual_writers[WriteFormat::SNPE_BENCHMARK] = std::make_unique<SNPEWriter>(snpe_log_name);
+    _actual_writers[WriteFormat::CHROME_TRACING] =
+        std::make_unique<ChromeTracingWriter>(chrome_tracing_log_name);
+    _actual_writers[WriteFormat::MD_TABLE] = std::make_unique<MDTableWriter>(md_table_log_name);
+  };
+
+  void flush(WriteFormat write_format);
 
 private:
-  const EventRecorder &_recorder;
+  static std::mutex _mutex;
+
+  // number of observer of an executor that want to write profiling data
+  int32_t _ref_count;
+
+  // one recorder object per executor
+  std::vector<std::unique_ptr<EventRecorder>> _recorders;
+
+  std::unordered_map<WriteFormat, std::unique_ptr<EventFormatWriter>> _actual_writers;
 };
 
 #endif // __ONERT_UTIL_EVENT_WRITER_H__
diff --git a/runtime/onert/core/src/util/ShapeInference.cc b/runtime/onert/core/src/util/ShapeInference.cc
index 1f468a8b5..3ed3080cc 100644
--- a/runtime/onert/core/src/util/ShapeInference.cc
+++ b/runtime/onert/core/src/util/ShapeInference.cc
@@ -128,11 +128,11 @@ ir::Shape inferEltwiseShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_sha
   return broadcastShapes(lhs_shape, rhs_shape);
 }
 
-ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank)
+ir::Shape inferArgMinMaxShape(const ir::Shape &input_shape, int axis, int rank)
 {
   if (axis < 0 || axis >= rank)
   {
-    throw std::runtime_error("ArgMax shape inference: Wrong axis value " + std::to_string(axis));
+    throw std::runtime_error("ArgMinMax shape inference: Wrong axis value " + std::to_string(axis));
   }
 
   ir::Shape out_shape;
@@ -385,18 +385,22 @@ ir::Shape inferExpandDimsShape(const ir::Shape &in_shape, int32_t axis)
   return out_shape;
 }
 
-ir::Shape inferFillShape(const ir::Shape &in_shape, const int32_t *in_buf)
+template <typename T> ir::Shape inferFillShape(const ir::Shape &fill_shape, const T *shape_buf)
 {
-  ir::Shape out_shape(in_shape.dim(0));
+  ir::Shape out_shape(fill_shape.dim(0));
 
   for (int out_x = 0; out_x < out_shape.rank(); ++out_x)
   {
-    out_shape.dim(out_x) = in_buf[out_x];
+    out_shape.dim(out_x) = static_cast<int32_t>(shape_buf[out_x]);
   }
 
   return out_shape;
 }
 
+// template instantiation
+template ir::Shape inferFillShape(const ir::Shape &fill_shape, const int32_t *shape_buf);
+template ir::Shape inferFillShape(const ir::Shape &fill_shape, const int64_t *shape_buf);
+
 ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &ker_shape)
 {
   assert(in_shape.rank() >= 2);
diff --git a/runtime/onert/core/src/util/TracingCtx.cc b/runtime/onert/core/src/util/TracingCtx.cc
new file mode 100644
index 000000000..08a1b32a7
--- /dev/null
+++ b/runtime/onert/core/src/util/TracingCtx.cc
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/TracingCtx.h"
+
+namespace onert
+{
+namespace util
+{
+
+// initializing static member var
+std::mutex TracingCtx::_session_id_mutex;
+
+} // namespace util
+} // namespace onert
diff --git a/runtime/onert/frontend/.clang-format b/runtime/onert/frontend/.clang-format
new file mode 120000
index 000000000..83185fee3
--- /dev/null
+++ b/runtime/onert/frontend/.clang-format
@@ -0,0 +1 @@
+../../../.clang-format.8
+\ No newline at end of file
diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h
index c0003e402..f9c97b41b 100644
--- a/runtime/onert/frontend/base_loader/include/base_loader.h
+++ b/runtime/onert/frontend/base_loader/include/base_loader.h
@@ -68,7 +68,7 @@ public:
    * @param graph reference on subgraphs
    */
   explicit BaseLoader(std::unique_ptr<ir::Subgraphs> &subgs)
-      : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _subgraphs(subgs), _model{nullptr}
+    : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _subgraphs(subgs), _model{nullptr}
   {
     _use_mmaped_data = util::getConfigBool(util::config::USE_MMAPED_DATA);
   }
@@ -114,23 +114,19 @@ private:
   // Operations
   template <typename OpIR, typename... Args>
   const OpIR *loadOperationTo(const Operator *op, ir::Graph &subg, Args &&... args);
-  void loadConv2D(const Operator *op, ir::Graph &subg);
-  void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg);
-  void loadTransposeConv(const Operator *op, ir::Graph &subg);
-  void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type);
-  void loadReshape(const Operator *op, ir::Graph &subg);
-  void loadSoftmax(const Operator *op, ir::Graph &subg);
-  void loadConcatenation(const Operator *op, ir::Graph &subg);
-  void loadFC(const Operator *op, ir::Graph &subg);
+
+  void loadAddV2(const Operator *op, ir::Graph &subg);
+  void loadArgMinMax(const Operator *op, ir::Graph &subg, bool is_argmax);
+  void loadBatchMatMul(const Operator *op, ir::Graph &subg);
   void loadBinaryArithmetic(const Operator *op, ir::Graph &subg,
                             ir::operation::BinaryArithmetic::ArithmeticType op_type);
-  void loadAddV2(const Operator *op, ir::Graph &subg);
-  void loadPack(const Operator *op, ir::Graph &subg);
-  void loadResizeBilinear(const Operator *op, ir::Graph &subg);
-  void loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg);
-  void loadReduce(const Operator *op, ir::Graph &subg,
-                  ir::operation::Reduce::ReduceType reduce_type);
-  void loadReduceAll(const Operator *op, ir::Graph &subg);
+  void loadComparison(const Operator *op, ir::Graph &subg);
+  void loadConcatenation(const Operator *op, ir::Graph &subg);
+  void loadConv2D(const Operator *op, ir::Graph &subg);
+  void loadCustom(const Operator *op, ir::Graph &subg);
+  void loadDepthToSpace(const Operator *op, ir::Graph &subg);
+  void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg);
+  void loadEinsum(const Operator *op, ir::Graph &subg);
   void loadElementwiseActivation(const Operator *op, ir::Graph &subg,
                                  ir::operation::ElementwiseActivation::Type op_type,
                                  float alpha = 0.f, float beta = 0.f);
@@ -138,25 +134,31 @@ private:
                              ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type);
   void loadElementwiseUnary(const Operator *op, ir::Graph &subg,
                             ir::operation::ElementwiseUnary::Type op_type);
+  void loadFC(const Operator *op, ir::Graph &subg);
+  void loadFusedBatchNorm(const Operator *op, ir::Graph &subg);
   void loadGather(const Operator *op, ir::Graph &subg);
-  void loadCustom(const Operator *op, ir::Graph &subg);
-  void loadBatchMatMul(const Operator *op, ir::Graph &subg);
-  void loadSqueeze(const Operator *op, ir::Graph &subg);
+  void loadIf(const Operator *op, ir::Graph &subg);
+  void loadLeakyRelu(const Operator *op, ir::Graph &subg);
+  void loadLogSoftmax(const Operator *op, ir::Graph &subg);
+  void loadOneHot(const Operator *op, ir::Graph &subg);
+  void loadPack(const Operator *op, ir::Graph &subg);
+  void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type);
+  void loadReduce(const Operator *op, ir::Graph &subg,
+                  ir::operation::Reduce::ReduceType reduce_type);
+  void loadReduceAll(const Operator *op, ir::Graph &subg);
+  void loadReshape(const Operator *op, ir::Graph &subg);
+  void loadResizeBilinear(const Operator *op, ir::Graph &subg);
+  void loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg);
+  void loadSoftmax(const Operator *op, ir::Graph &subg);
+  void loadSpaceToDepth(const Operator *op, ir::Graph &subg);
   void loadSplit(const Operator *op, ir::Graph &subg);
   void loadSplitV(const Operator *op, ir::Graph &subg);
+  void loadSqueeze(const Operator *op, ir::Graph &subg);
   void loadStridedSlice(const Operator *op, ir::Graph &subg);
+  void loadTransposeConv(const Operator *op, ir::Graph &subg);
+  void loadUnidirectionalSequenceLSTM(const Operator *op, ir::Graph &subg);
   void loadUnpack(const Operator *op, ir::Graph &subg);
-  void loadComparison(const Operator *op, ir::Graph &subg);
-  void loadEinsum(const Operator *op, ir::Graph &subg);
-  void loadOneHot(const Operator *op, ir::Graph &subg);
-  void loadIf(const Operator *op, ir::Graph &subg);
   void loadWhile(const Operator *op, ir::Graph &subg);
-  void loadArgMax(const Operator *op, ir::Graph &subg);
-  void loadFusedBatchNorm(const Operator *op, ir::Graph &subg);
-  void loadLogSoftmax(const Operator *op, ir::Graph &subg);
-  void loadSpaceToDepth(const Operator *op, ir::Graph &subg);
-  void loadLeakyRelu(const Operator *op, ir::Graph &subg);
-  void loadUnidirectionalSequenceLSTM(const Operator *op, ir::Graph &subg);
 
   void verifySubgraphIndex(int subg_index)
   {
@@ -255,19 +257,26 @@ ir::DataType BaseLoader<LoaderDomain>::BaseLoader::tensorTypeToDataType(const Te
   {
     case TensorType::TensorType_FLOAT32:
       return ir::DataType::FLOAT32;
+    case TensorType::TensorType_FLOAT16:
+      return ir::DataType::FLOAT16;
     case TensorType::TensorType_INT32:
       return ir::DataType::INT32;
-    case TensorType::TensorType_BOOL:
-      return ir::DataType::BOOL8;
     case TensorType::TensorType_UINT8:
       return ir::DataType::QUANT_UINT8_ASYMM;
-    case TensorType::TensorType_INT8:
-      return ir::DataType::QUANT_INT8_ASYMM;
     case TensorType::TensorType_INT64:
       return ir::DataType::INT64;
+    // case TensorType::TensorType_STRING:
+    case TensorType::TensorType_BOOL:
+      return ir::DataType::BOOL8;
+    case TensorType::TensorType_INT16:
+      return ir::DataType::QUANT_INT16_ASYMM;
+    // case TensorType::TensorType_COMPLEX64
+    case TensorType::TensorType_INT8:
+      return ir::DataType::QUANT_INT8_ASYMM;
+    // case TensorType::TensorType_FLOAT64
     default:
       throw std::runtime_error(
-          std::string("Unsupported tensor type: ").append(EnumNameTensorType(type)));
+        std::string("Unsupported tensor type: ").append(EnumNameTensorType(type)));
   }
 }
 
@@ -385,7 +394,7 @@ ir::OperandIndex BaseLoader<LoaderDomain>::loadOperand(const Tensor *tensor, ir:
       {
         size_t offset = unaligned_offset_start - aligned_offset_start;
         uint8_t *mmap_base = static_cast<uint8_t *>(
-            mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, _fd, aligned_offset_start));
+          mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE, _fd, aligned_offset_start));
         data_obj = std::make_unique<ir::CachedData>(mmap_base + offset, data_size);
         munmap(mmap_base, mmap_size);
       }
@@ -446,7 +455,7 @@ void BaseLoader<LoaderDomain>::loadSparsity(const Tensor *tensor, const ir::Shap
     bool block2D_sparsity = dim_metadata_size == 4 && block_rank == 2;
     if (dim_metadata_size != !random_sparsity && !block2D_sparsity)
       throw std::runtime_error(
-          "sparsity is supported only for 2D tensor with random or 16x1 block sparsity.");
+        "sparsity is supported only for 2D tensor with random or 16x1 block sparsity.");
 
     const auto *src_metadata = src_sparsity->dim_metadata()->Get(0);
     if (src_metadata->format() != DimensionType::DimensionType_DENSE)
@@ -514,8 +523,8 @@ void BaseLoader<LoaderDomain>::loadOperationIO(const Operator *op, ir::OperandIn
       auto builtin_code = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
       if (isOptionalInputTensor(idx) && !allowOptionalInputTensor(builtin_code))
         throw std::runtime_error(
-            std::string("loader doesn't support optional input tensor yet for ")
-                .append(EnumNameBuiltinOperator(builtin_code)));
+          std::string("loader doesn't support optional input tensor yet for ")
+            .append(EnumNameBuiltinOperator(builtin_code)));
     };
     check_optional_input();
     inputs.append(tensorIdxToOperandIdx(idx));
@@ -691,9 +700,9 @@ void BaseLoader<LoaderDomain>::loadFC(const Operator *op, ir::Graph &subg)
   const auto fc = loadOperationTo<ir::operation::FullyConnected>(op, subg, param);
 
   const auto &input_operand =
-      subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::INPUT));
+    subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::INPUT));
   auto &weights_operand =
-      subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::WEIGHT));
+    subg.operands().at(fc->getInputs().at(ir::operation::FullyConnected::WEIGHT));
   if (input_operand.typeInfo().type() == ir::DataType::FLOAT32 &&
       ((weights_operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM) ||
        weights_operand.typeInfo().type() == ir::DataType::QUANT_INT8_ASYMM))
@@ -719,7 +728,7 @@ void BaseLoader<LoaderDomain>::loadAddV2(const Operator *op, ir::Graph &subg)
     auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
     auto attr_map = data_root.AsMap();
     const auto fused_activation_func = static_cast<typename LoaderDomain::ActivationFunctionType>(
-        attr_map["fused_activation_function"].AsInt8());
+      attr_map["fused_activation_function"].AsInt8());
     param.activation = convertActivation(fused_activation_func);
   }
 
@@ -727,8 +736,18 @@ void BaseLoader<LoaderDomain>::loadAddV2(const Operator *op, ir::Graph &subg)
 }
 
 template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadDepthToSpace(const Operator *op, ir::Graph &subg)
+{
+  ir::operation::DepthToSpace::Param param;
+  const auto *options = op->builtin_options_as_DepthToSpaceOptions();
+  param.block_size = options->block_size();
+
+  loadOperationTo<ir::operation::DepthToSpace>(op, subg, param);
+}
+
+template <typename LoaderDomain>
 void BaseLoader<LoaderDomain>::loadBinaryArithmetic(
-    const Operator *op, ir::Graph &subg, ir::operation::BinaryArithmetic::ArithmeticType op_type)
+  const Operator *op, ir::Graph &subg, ir::operation::BinaryArithmetic::ArithmeticType op_type)
 {
   ir::operation::BinaryArithmetic::Param param;
   param.arithmetic_type = op_type;
@@ -780,8 +799,8 @@ void BaseLoader<LoaderDomain>::loadPack(const Operator *op, ir::Graph &subg)
 
 template <typename LoaderDomain>
 void BaseLoader<LoaderDomain>::loadElementwiseActivation(
-    const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type,
-    float alpha, float beta)
+  const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type,
+  float alpha, float beta)
 {
   ir::operation::ElementwiseActivation::Param param;
   param.op_type = op_type;
@@ -844,8 +863,8 @@ void BaseLoader<LoaderDomain>::loadReduceAll(const Operator *op, ir::Graph &subg
 
 template <typename LoaderDomain>
 void BaseLoader<LoaderDomain>::loadElementwiseBinary(
-    const Operator *op, ir::Graph &subg,
-    ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
+  const Operator *op, ir::Graph &subg,
+  ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
 {
   ir::operation::ElementwiseBinary::Param param;
   param.op_type = op_type;
@@ -870,7 +889,7 @@ void BaseLoader<LoaderDomain>::loadElementwiseUnary(const Operator *op, ir::Grap
       }
     };
     qasymm8ToUint8(
-        subg.operands().at(eu->getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)));
+      subg.operands().at(eu->getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)));
     qasymm8ToUint8(subg.operands().at(eu->getOutputs().at(0)));
   }
 }
@@ -915,8 +934,8 @@ void BaseLoader<LoaderDomain>::loadBatchMatMul(const Operator *op, ir::Graph &su
       break;
     default:
       throw std::runtime_error(
-          std::string("Wrong loaded operation: ").append(EnumNameBuiltinOperator(builtin_op)) +
-          " as " + EnumNameBuiltinOperator(BuiltinOperator::BuiltinOperator_BATCH_MATMUL));
+        std::string("Wrong loaded operation: ").append(EnumNameBuiltinOperator(builtin_op)) +
+        " as " + EnumNameBuiltinOperator(BuiltinOperator::BuiltinOperator_BATCH_MATMUL));
   }
 
   loadOperationTo<ir::operation::BatchMatMul>(op, subg, param);
@@ -959,15 +978,15 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg)
 
   // Mapping from custom op name string to BuiltinOP enum
   std::map<std::string, BuiltinOP> builtin_map = {
-      {"AddV2", BuiltinOP::AddV2},
-      {"All", BuiltinOP::ReduceAll},
-      {"MatrixBandPart", BuiltinOP::MatrixBandPart},
-      {"BatchMatMulV2", BuiltinOP::BatchMatMul},
-      {"Einsum", BuiltinOP::Einsum},
-      {"FusedBatchNormV3", BuiltinOP::FusedBatchNorm},
-      {"BroadcastTo", BuiltinOP::BroadcastTo},
-      {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform},
-      {"Erf", BuiltinOP::Erf},
+    {"AddV2", BuiltinOP::AddV2},
+    {"All", BuiltinOP::ReduceAll},
+    {"MatrixBandPart", BuiltinOP::MatrixBandPart},
+    {"BatchMatMulV2", BuiltinOP::BatchMatMul},
+    {"Einsum", BuiltinOP::Einsum},
+    {"FusedBatchNormV3", BuiltinOP::FusedBatchNorm},
+    {"BroadcastTo", BuiltinOP::BroadcastTo},
+    {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform},
+    {"Erf", BuiltinOP::Erf},
   };
 
   try
@@ -1005,7 +1024,7 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg)
         break;
       default:
         throw std::runtime_error{
-            "Loader: Custom OP map is defined but operation loader function is not defined"};
+          "Loader: Custom OP map is defined but operation loader function is not defined"};
     }
 
     return;
@@ -1120,7 +1139,7 @@ void BaseLoader<LoaderDomain>::loadComparison(const Operator *op, ir::Graph &sub
       break;
     default:
       throw std::runtime_error(
-          std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
+        std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
   }
 
   loadOperationTo<ir::operation::Comparison>(op, subg, param);
@@ -1224,25 +1243,15 @@ void BaseLoader<LoaderDomain>::loadWhile(const Operator *op, ir::Graph &subg)
 }
 
 template <typename LoaderDomain>
-void BaseLoader<LoaderDomain>::loadArgMax(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain>::loadArgMinMax(const Operator *op, ir::Graph &subg, bool is_argmax)
 {
-  ir::operation::ArgMax::Param param;
-  const auto output_type = op->builtin_options_as_ArgMaxOptions()->output_type();
-  switch (output_type)
-  {
-    case TensorType::TensorType_INT32:
-    case TensorType::TensorType_INT64:
-      param.output_type = tensorTypeToDataType(output_type);
-      break;
-    default:
-      throw std::runtime_error("ArgMax: `output_type` must be either int32 or int64.");
-  }
-  auto am = loadOperationTo<ir::operation::ArgMax>(op, subg, param);
+  ir::operation::ArgMinMax::Param param;
+  const auto output_type = is_argmax ? op->builtin_options_as_ArgMaxOptions()->output_type()
+                                     : op->builtin_options_as_ArgMinOptions()->output_type();
+  param.output_type = tensorTypeToDataType(output_type);
+  param.is_arg_max = is_argmax;
 
-  auto &axisOperand = subg.operands().at(am->getInputs().at(ir::operation::ArgMax::Input::AXIS));
-  if (!(axisOperand.operandSize() == 4 && (axisOperand.typeInfo().type() == ir::DataType::INT32 ||
-                                           axisOperand.typeInfo().type() == ir::DataType::INT64)))
-    throw std::runtime_error("ArgMax: `axis` with an int32 or int64 element is only supported.");
+  loadOperationTo<ir::operation::ArgMinMax>(op, subg, param);
 }
 
 template <typename LoaderDomain>
@@ -1287,7 +1296,7 @@ void BaseLoader<LoaderDomain>::loadUnidirectionalSequenceLSTM(const Operator *op
   {
     auto builtin_code = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
     throw std::runtime_error(std::string("loader doesn't support optional output tensor yet for ")
-                                 .append(EnumNameBuiltinOperator(builtin_code)));
+                               .append(EnumNameBuiltinOperator(builtin_code)));
   }
   for (size_t i = 0; i < ir::operation::LSTM::Output::OUTPUT; ++i)
   {
@@ -1355,6 +1364,9 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg
     case BuiltinOperator::BuiltinOperator_PACK:
       loadPack(op, subg);
       return;
+    case BuiltinOperator::BuiltinOperator_ELU:
+      loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::ELU);
+      return;
     case BuiltinOperator::BuiltinOperator_RELU:
       loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU,
                                 ir::operation::ElementwiseActivation::infinity, 0.f);
@@ -1383,6 +1395,9 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg
     case BuiltinOperator::BuiltinOperator_SQRT:
       loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SQRT);
       return;
+    case BuiltinOperator::BuiltinOperator_SQUARE:
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SQUARE);
+      return;
     case BuiltinOperator::BuiltinOperator_SQUARED_DIFFERENCE:
       loadOperationTo<ir::operation::SquaredDifference>(op, subg);
       return;
@@ -1499,7 +1514,10 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg
       loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::NEG);
       return;
     case BuiltinOperator::BuiltinOperator_ARG_MAX:
-      loadArgMax(op, subg);
+      loadArgMinMax(op, subg, true);
+      return;
+    case BuiltinOperator::BuiltinOperator_ARG_MIN:
+      loadArgMinMax(op, subg, false);
       return;
     case BuiltinOperator::BuiltinOperator_LOG:
       loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOG);
@@ -1513,6 +1531,10 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg
     case BuiltinOperator::BuiltinOperator_LOGICAL_NOT:
       loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOGICAL_NOT);
       return;
+    case BuiltinOperator::BuiltinOperator_LOGICAL_AND:
+      loadElementwiseBinary(op, subg,
+                            ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND);
+      return;
     case BuiltinOperator::BuiltinOperator_LOGICAL_OR:
       loadElementwiseBinary(op, subg,
                             ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
@@ -1556,9 +1578,12 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg
     case BuiltinOperator::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
       loadUnidirectionalSequenceLSTM(op, subg);
       return;
+    case BuiltinOperator::BuiltinOperator_DEPTH_TO_SPACE:
+      loadDepthToSpace(op, subg);
+      return;
     default:
       throw std::runtime_error(
-          std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
+        std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
   }
 }
 
diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc
index 33e1709a8..0d7b3eab4 100644
--- a/runtime/onert/frontend/circle/src/circle_loader.cc
+++ b/runtime/onert/frontend/circle/src/circle_loader.cc
@@ -196,7 +196,7 @@ void CircleLoader::loadBCQFullyConnected(const Operator *op, ir::Graph &subg)
   param.activation = convertActivation(options->fused_activation_function());
 
   std::unique_ptr<ir::Operation> new_op(
-      new ir::operation::BCQFullyConnected(inputs, outputs, param));
+    new ir::operation::BCQFullyConnected(inputs, outputs, param));
   subg.addOperation(std::move(new_op));
 }
 
diff --git a/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h b/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h
index 0ff1f72a2..eb1775297 100644
--- a/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h
+++ b/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h
@@ -2155,9 +2155,8 @@ enum ActivationFunctionType
 inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6]
 {
   static const ActivationFunctionType values[] = {
-      ActivationFunctionType_NONE,         ActivationFunctionType_RELU,
-      ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6,
-      ActivationFunctionType_TANH,         ActivationFunctionType_SIGN_BIT};
+    ActivationFunctionType_NONE,  ActivationFunctionType_RELU, ActivationFunctionType_RELU_N1_TO_1,
+    ActivationFunctionType_RELU6, ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT};
   return values;
 }
 
@@ -2218,9 +2217,8 @@ enum FullyConnectedOptionsWeightsFormat
 inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[3]
 {
   static const FullyConnectedOptionsWeightsFormat values[] = {
-      FullyConnectedOptionsWeightsFormat_DEFAULT,
-      FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8,
-      FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32};
+    FullyConnectedOptionsWeightsFormat_DEFAULT, FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8,
+    FullyConnectedOptionsWeightsFormat_SHUFFLED16x1FLOAT32};
   return values;
 }
 
@@ -2478,8 +2476,8 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
   const circle::CustomQuantization *details_as_CustomQuantization() const
   {
     return details_type() == circle::QuantizationDetails_CustomQuantization
-               ? static_cast<const circle::CustomQuantization *>(details())
-               : nullptr;
+             ? static_cast<const circle::CustomQuantization *>(details())
+             : nullptr;
   }
   int32_t quantized_dimension() const { return GetField<int32_t>(VT_QUANTIZED_DIMENSION, 0); }
   bool Verify(flatbuffers::Verifier &verifier) const
@@ -2551,12 +2549,12 @@ struct QuantizationParametersBuilder
 };
 
 inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(
-    flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
-    flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
-    flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
-    circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE,
-    flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+  flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
+  flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
+  flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
+  circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE,
+  flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
 {
   QuantizationParametersBuilder builder_(_fbb);
   builder_.add_quantized_dimension(quantized_dimension);
@@ -2570,11 +2568,11 @@ inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(
 }
 
 inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect(
-    flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
-    const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
-    const std::vector<int64_t> *zero_point = nullptr,
-    circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE,
-    flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+  flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
+  const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
+  const std::vector<int64_t> *zero_point = nullptr,
+  circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE,
+  flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
 {
   auto min__ = min ? _fbb.CreateVector<float>(*min) : 0;
   auto max__ = max ? _fbb.CreateVector<float>(*max) : 0;
@@ -2789,20 +2787,20 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   const circle::Int32Vector *array_segments_as_Int32Vector() const
   {
     return array_segments_type() == circle::SparseIndexVector_Int32Vector
-               ? static_cast<const circle::Int32Vector *>(array_segments())
-               : nullptr;
+             ? static_cast<const circle::Int32Vector *>(array_segments())
+             : nullptr;
   }
   const circle::Uint16Vector *array_segments_as_Uint16Vector() const
   {
     return array_segments_type() == circle::SparseIndexVector_Uint16Vector
-               ? static_cast<const circle::Uint16Vector *>(array_segments())
-               : nullptr;
+             ? static_cast<const circle::Uint16Vector *>(array_segments())
+             : nullptr;
   }
   const circle::Uint8Vector *array_segments_as_Uint8Vector() const
   {
     return array_segments_type() == circle::SparseIndexVector_Uint8Vector
-               ? static_cast<const circle::Uint8Vector *>(array_segments())
-               : nullptr;
+             ? static_cast<const circle::Uint8Vector *>(array_segments())
+             : nullptr;
   }
   circle::SparseIndexVector array_indices_type() const
   {
@@ -2813,20 +2811,20 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   const circle::Int32Vector *array_indices_as_Int32Vector() const
   {
     return array_indices_type() == circle::SparseIndexVector_Int32Vector
-               ? static_cast<const circle::Int32Vector *>(array_indices())
-               : nullptr;
+             ? static_cast<const circle::Int32Vector *>(array_indices())
+             : nullptr;
   }
   const circle::Uint16Vector *array_indices_as_Uint16Vector() const
   {
     return array_indices_type() == circle::SparseIndexVector_Uint16Vector
-               ? static_cast<const circle::Uint16Vector *>(array_indices())
-               : nullptr;
+             ? static_cast<const circle::Uint16Vector *>(array_indices())
+             : nullptr;
   }
   const circle::Uint8Vector *array_indices_as_Uint8Vector() const
   {
     return array_indices_type() == circle::SparseIndexVector_Uint8Vector
-               ? static_cast<const circle::Uint8Vector *>(array_indices())
-               : nullptr;
+             ? static_cast<const circle::Uint8Vector *>(array_indices())
+             : nullptr;
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -2924,12 +2922,12 @@ struct DimensionMetadataBuilder
 };
 
 inline flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadata(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    circle::DimensionType format = circle::DimensionType_DENSE, int32_t dense_size = 0,
-    circle::SparseIndexVector array_segments_type = circle::SparseIndexVector_NONE,
-    flatbuffers::Offset<void> array_segments = 0,
-    circle::SparseIndexVector array_indices_type = circle::SparseIndexVector_NONE,
-    flatbuffers::Offset<void> array_indices = 0)
+  flatbuffers::FlatBufferBuilder &_fbb, circle::DimensionType format = circle::DimensionType_DENSE,
+  int32_t dense_size = 0,
+  circle::SparseIndexVector array_segments_type = circle::SparseIndexVector_NONE,
+  flatbuffers::Offset<void> array_segments = 0,
+  circle::SparseIndexVector array_indices_type = circle::SparseIndexVector_NONE,
+  flatbuffers::Offset<void> array_indices = 0)
 {
   DimensionMetadataBuilder builder_(_fbb);
   builder_.add_array_indices(array_indices);
@@ -2961,7 +2959,7 @@ struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   const flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>> *dim_metadata() const
   {
     return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>> *>(
-        VT_DIM_METADATA);
+      VT_DIM_METADATA);
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -2987,8 +2985,8 @@ struct SparsityParametersBuilder
     fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map);
   }
   void add_dim_metadata(
-      flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
-          dim_metadata)
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
+      dim_metadata)
   {
     fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata);
   }
@@ -3006,11 +3004,11 @@ struct SparsityParametersBuilder
 };
 
 inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
-        dim_metadata = 0)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
+    dim_metadata = 0)
 {
   SparsityParametersBuilder builder_(_fbb);
   builder_.add_dim_metadata(dim_metadata);
@@ -3020,16 +3018,15 @@ inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
 }
 
 inline flatbuffers::Offset<SparsityParameters> CreateSparsityParametersDirect(
-    flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
-    const std::vector<int32_t> *block_map = nullptr,
-    const std::vector<flatbuffers::Offset<circle::DimensionMetadata>> *dim_metadata = nullptr)
+  flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
+  const std::vector<int32_t> *block_map = nullptr,
+  const std::vector<flatbuffers::Offset<circle::DimensionMetadata>> *dim_metadata = nullptr)
 {
   auto traversal_order__ = traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0;
   auto block_map__ = block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0;
   auto dim_metadata__ =
-      dim_metadata
-          ? _fbb.CreateVector<flatbuffers::Offset<circle::DimensionMetadata>>(*dim_metadata)
-          : 0;
+    dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<circle::DimensionMetadata>>(*dim_metadata)
+                 : 0;
   return circle::CreateSparsityParameters(_fbb, traversal_order__, block_map__, dim_metadata__);
 }
 
@@ -3155,12 +3152,11 @@ CreateTensor(flatbuffers::FlatBufferBuilder &_fbb,
 }
 
 inline flatbuffers::Offset<Tensor> CreateTensorDirect(
-    flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
-    circle::TensorType type = circle::TensorType_FLOAT32, uint32_t buffer = 0,
-    const char *name = nullptr,
-    flatbuffers::Offset<circle::QuantizationParameters> quantization = 0, bool is_variable = false,
-    flatbuffers::Offset<circle::SparsityParameters> sparsity = 0,
-    const std::vector<int32_t> *shape_signature = nullptr)
+  flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
+  circle::TensorType type = circle::TensorType_FLOAT32, uint32_t buffer = 0,
+  const char *name = nullptr, flatbuffers::Offset<circle::QuantizationParameters> quantization = 0,
+  bool is_variable = false, flatbuffers::Offset<circle::SparsityParameters> sparsity = 0,
+  const std::vector<int32_t> *shape_signature = nullptr)
 {
   auto shape__ = shape ? _fbb.CreateVector<int32_t>(*shape) : 0;
   auto name__ = name ? _fbb.CreateString(name) : 0;
@@ -3190,7 +3186,7 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   circle::ActivationFunctionType fused_activation_function() const
   {
     return static_cast<circle::ActivationFunctionType>(
-        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
   int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
@@ -3249,10 +3245,10 @@ struct Conv2DOptionsBuilder
 };
 
 inline flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
-    int32_t stride_w = 0, int32_t stride_h = 0,
-    circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
-    int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+  flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+  int32_t stride_w = 0, int32_t stride_h = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
 {
   Conv2DOptionsBuilder builder_(_fbb);
   builder_.add_dilation_h_factor(dilation_h_factor);
@@ -3287,7 +3283,7 @@ struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   circle::ActivationFunctionType fused_activation_function() const
   {
     return static_cast<circle::ActivationFunctionType>(
-        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -3344,9 +3340,9 @@ struct Pool2DOptionsBuilder
 };
 
 inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
-    int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, int32_t filter_height = 0,
-    circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+  flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+  int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, int32_t filter_height = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
 {
   Pool2DOptionsBuilder builder_(_fbb);
   builder_.add_filter_height(filter_height);
@@ -3381,7 +3377,7 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
   circle::ActivationFunctionType fused_activation_function() const
   {
     return static_cast<circle::ActivationFunctionType>(
-        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
   int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
@@ -3445,10 +3441,10 @@ struct DepthwiseConv2DOptionsBuilder
 };
 
 inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
-    int32_t stride_w = 0, int32_t stride_h = 0, int32_t depth_multiplier = 0,
-    circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
-    int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+  flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+  int32_t stride_w = 0, int32_t stride_h = 0, int32_t depth_multiplier = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
 {
   DepthwiseConv2DOptionsBuilder builder_(_fbb);
   builder_.add_dilation_h_factor(dilation_h_factor);
@@ -3499,12 +3495,12 @@ struct ConcatEmbeddingsOptionsBuilder
     fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0);
   }
   void add_num_columns_per_channel(
-      flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
   {
     fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel);
   }
   void add_embedding_dim_per_channel(
-      flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
   {
     fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL,
                    embedding_dim_per_channel);
@@ -3523,9 +3519,9 @@ struct ConcatEmbeddingsOptionsBuilder
 };
 
 inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
+  flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
 {
   ConcatEmbeddingsOptionsBuilder builder_(_fbb);
   builder_.add_embedding_dim_per_channel(embedding_dim_per_channel);
@@ -3540,9 +3536,9 @@ CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_
                                     const std::vector<int32_t> *embedding_dim_per_channel = nullptr)
 {
   auto num_columns_per_channel__ =
-      num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0;
+    num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0;
   auto embedding_dim_per_channel__ =
-      embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0;
+    embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0;
   return circle::CreateConcatEmbeddingsOptions(_fbb, num_channels, num_columns_per_channel__,
                                                embedding_dim_per_channel__);
 }
@@ -3609,7 +3605,7 @@ struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   circle::ActivationFunctionType fused_activation_function() const
   {
     return static_cast<circle::ActivationFunctionType>(
-        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool asymmetric_quantize_inputs() const
   {
@@ -3653,9 +3649,9 @@ struct SVDFOptionsBuilder
 };
 
 inline flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
-    circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
-    bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  bool asymmetric_quantize_inputs = false)
 {
   SVDFOptionsBuilder builder_(_fbb);
   builder_.add_rank(rank);
@@ -3675,7 +3671,7 @@ struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   circle::ActivationFunctionType fused_activation_function() const
   {
     return static_cast<circle::ActivationFunctionType>(
-        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool asymmetric_quantize_inputs() const
   {
@@ -3718,9 +3714,9 @@ struct RNNOptionsBuilder
 };
 
 inline flatbuffers::Offset<RNNOptions> CreateRNNOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
-    bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  bool asymmetric_quantize_inputs = false)
 {
   RNNOptionsBuilder builder_(_fbb);
   builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3741,7 +3737,7 @@ struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   circle::ActivationFunctionType fused_activation_function() const
   {
     return static_cast<circle::ActivationFunctionType>(
-        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool asymmetric_quantize_inputs() const
   {
@@ -3789,9 +3785,9 @@ struct SequenceRNNOptionsBuilder
 };
 
 inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
-    circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
-    bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  bool asymmetric_quantize_inputs = false)
 {
   SequenceRNNOptionsBuilder builder_(_fbb);
   builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3814,7 +3810,7 @@ struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuf
   circle::ActivationFunctionType fused_activation_function() const
   {
     return static_cast<circle::ActivationFunctionType>(
-        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; }
   bool asymmetric_quantize_inputs() const
@@ -3869,9 +3865,9 @@ struct BidirectionalSequenceRNNOptionsBuilder
 };
 
 inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
-    circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
-    bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
 {
   BidirectionalSequenceRNNOptionsBuilder builder_(_fbb);
   builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3894,12 +3890,12 @@ struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
   circle::ActivationFunctionType fused_activation_function() const
   {
     return static_cast<circle::ActivationFunctionType>(
-        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   circle::FullyConnectedOptionsWeightsFormat weights_format() const
   {
     return static_cast<circle::FullyConnectedOptionsWeightsFormat>(
-        GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
+      GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
   }
   bool keep_num_dims() const { return GetField<uint8_t>(VT_KEEP_NUM_DIMS, 0) != 0; }
   bool asymmetric_quantize_inputs() const
@@ -3955,11 +3951,11 @@ struct FullyConnectedOptionsBuilder
 };
 
 inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
-    circle::FullyConnectedOptionsWeightsFormat weights_format =
-        circle::FullyConnectedOptionsWeightsFormat_DEFAULT,
-    bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  circle::FullyConnectedOptionsWeightsFormat weights_format =
+    circle::FullyConnectedOptionsWeightsFormat_DEFAULT,
+  bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
 {
   FullyConnectedOptionsBuilder builder_(_fbb);
   builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -4023,7 +4019,7 @@ struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   circle::ActivationFunctionType fused_activation_function() const
   {
     return static_cast<circle::ActivationFunctionType>(
-        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -4057,8 +4053,8 @@ struct ConcatenationOptionsBuilder
 };
 
 inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
-    circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+  flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
 {
   ConcatenationOptionsBuilder builder_(_fbb);
   builder_.add_axis(axis);
@@ -4076,7 +4072,7 @@ struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   circle::ActivationFunctionType fused_activation_function() const
   {
     return static_cast<circle::ActivationFunctionType>(
-        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -4109,8 +4105,8 @@ struct AddOptionsBuilder
 };
 
 inline flatbuffers::Offset<AddOptions> CreateAddOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
 {
   AddOptionsBuilder builder_(_fbb);
   builder_.add_fused_activation_function(fused_activation_function);
@@ -4127,7 +4123,7 @@ struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   circle::ActivationFunctionType fused_activation_function() const
   {
     return static_cast<circle::ActivationFunctionType>(
-        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -4160,8 +4156,8 @@ struct MulOptionsBuilder
 };
 
 inline flatbuffers::Offset<MulOptions> CreateMulOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
 {
   MulOptionsBuilder builder_(_fbb);
   builder_.add_fused_activation_function(fused_activation_function);
@@ -4178,7 +4174,7 @@ struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   circle::ActivationFunctionType fused_activation_function() const
   {
     return static_cast<circle::ActivationFunctionType>(
-        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -4211,8 +4207,8 @@ struct L2NormOptionsBuilder
 };
 
 inline flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
 {
   L2NormOptionsBuilder builder_(_fbb);
   builder_.add_fused_activation_function(fused_activation_function);
@@ -4263,7 +4259,7 @@ struct LocalResponseNormalizationOptionsBuilder
     fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f);
   }
   explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-      : fbb_(_fbb)
+    : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
@@ -4303,7 +4299,7 @@ struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   circle::ActivationFunctionType fused_activation_function() const
   {
     return static_cast<circle::ActivationFunctionType>(
-        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
   float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
@@ -4367,11 +4363,11 @@ struct LSTMOptionsBuilder
 };
 
 inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
-    float cell_clip = 0.0f, float proj_clip = 0.0f,
-    circle::LSTMKernelType kernel_type = circle::LSTMKernelType_FULL,
-    bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  float cell_clip = 0.0f, float proj_clip = 0.0f,
+  circle::LSTMKernelType kernel_type = circle::LSTMKernelType_FULL,
+  bool asymmetric_quantize_inputs = false)
 {
   LSTMOptionsBuilder builder_(_fbb);
   builder_.add_proj_clip(proj_clip);
@@ -4396,7 +4392,7 @@ struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatb
   circle::ActivationFunctionType fused_activation_function() const
   {
     return static_cast<circle::ActivationFunctionType>(
-        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
   float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
@@ -4445,7 +4441,7 @@ struct UnidirectionalSequenceLSTMOptionsBuilder
                              static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
   }
   explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-      : fbb_(_fbb)
+    : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
@@ -4461,10 +4457,10 @@ struct UnidirectionalSequenceLSTMOptionsBuilder
 
 inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>
 CreateUnidirectionalSequenceLSTMOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
-    float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
-    bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
+  bool asymmetric_quantize_inputs = false)
 {
   UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
   builder_.add_proj_clip(proj_clip);
@@ -4490,7 +4486,7 @@ struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbu
   circle::ActivationFunctionType fused_activation_function() const
   {
     return static_cast<circle::ActivationFunctionType>(
-        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
   float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
@@ -4546,7 +4542,7 @@ struct BidirectionalSequenceLSTMOptionsBuilder
                              static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
   }
   explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-      : fbb_(_fbb)
+    : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
@@ -4561,10 +4557,10 @@ struct BidirectionalSequenceLSTMOptionsBuilder
 };
 
 inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
-    float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
-    bool time_major = true, bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
+  bool time_major = true, bool asymmetric_quantize_inputs = false)
 {
   BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
   builder_.add_proj_clip(proj_clip);
@@ -5075,7 +5071,7 @@ struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   circle::ActivationFunctionType fused_activation_function() const
   {
     return static_cast<circle::ActivationFunctionType>(
-        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -5108,8 +5104,8 @@ struct SubOptionsBuilder
 };
 
 inline flatbuffers::Offset<SubOptions> CreateSubOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
 {
   SubOptionsBuilder builder_(_fbb);
   builder_.add_fused_activation_function(fused_activation_function);
@@ -5126,7 +5122,7 @@ struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   circle::ActivationFunctionType fused_activation_function() const
   {
     return static_cast<circle::ActivationFunctionType>(
-        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -5159,8 +5155,8 @@ struct DivOptionsBuilder
 };
 
 inline flatbuffers::Offset<DivOptions> CreateDivOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
 {
   DivOptionsBuilder builder_(_fbb);
   builder_.add_fused_activation_function(fused_activation_function);
@@ -7976,7 +7972,7 @@ struct BCQFullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::T
   circle::ActivationFunctionType fused_activation_function() const
   {
     return static_cast<circle::ActivationFunctionType>(
-        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -8014,8 +8010,8 @@ struct BCQFullyConnectedOptionsBuilder
 };
 
 inline flatbuffers::Offset<BCQFullyConnectedOptions> CreateBCQFullyConnectedOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, int32_t weights_hidden_size = 0,
-    circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+  flatbuffers::FlatBufferBuilder &_fbb, int32_t weights_hidden_size = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
 {
   BCQFullyConnectedOptionsBuilder builder_(_fbb);
   builder_.add_weights_hidden_size(weights_hidden_size);
@@ -8035,7 +8031,7 @@ struct InstanceNormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   circle::ActivationFunctionType fused_activation_function() const
   {
     return static_cast<circle::ActivationFunctionType>(
-        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -8072,8 +8068,8 @@ struct InstanceNormOptionsBuilder
 };
 
 inline flatbuffers::Offset<InstanceNormOptions> CreateInstanceNormOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, float epsilon = 0.0f,
-    circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+  flatbuffers::FlatBufferBuilder &_fbb, float epsilon = 0.0f,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
 {
   InstanceNormOptionsBuilder builder_(_fbb);
   builder_.add_epsilon(epsilon);
@@ -8191,632 +8187,632 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   const circle::Conv2DOptions *builtin_options_as_Conv2DOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_Conv2DOptions
-               ? static_cast<const circle::Conv2DOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::Conv2DOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_DepthwiseConv2DOptions
-               ? static_cast<const circle::DepthwiseConv2DOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::DepthwiseConv2DOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_ConcatEmbeddingsOptions
-               ? static_cast<const circle::ConcatEmbeddingsOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::ConcatEmbeddingsOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_LSHProjectionOptions
-               ? static_cast<const circle::LSHProjectionOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::LSHProjectionOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::Pool2DOptions *builtin_options_as_Pool2DOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_Pool2DOptions
-               ? static_cast<const circle::Pool2DOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::Pool2DOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::SVDFOptions *builtin_options_as_SVDFOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_SVDFOptions
-               ? static_cast<const circle::SVDFOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::SVDFOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::RNNOptions *builtin_options_as_RNNOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_RNNOptions
-               ? static_cast<const circle::RNNOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::RNNOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_FullyConnectedOptions
-               ? static_cast<const circle::FullyConnectedOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::FullyConnectedOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::SoftmaxOptions *builtin_options_as_SoftmaxOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_SoftmaxOptions
-               ? static_cast<const circle::SoftmaxOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::SoftmaxOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::ConcatenationOptions *builtin_options_as_ConcatenationOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_ConcatenationOptions
-               ? static_cast<const circle::ConcatenationOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::ConcatenationOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::AddOptions *builtin_options_as_AddOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_AddOptions
-               ? static_cast<const circle::AddOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::AddOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::L2NormOptions *builtin_options_as_L2NormOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_L2NormOptions
-               ? static_cast<const circle::L2NormOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::L2NormOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::LocalResponseNormalizationOptions *
   builtin_options_as_LocalResponseNormalizationOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_LocalResponseNormalizationOptions
-               ? static_cast<const circle::LocalResponseNormalizationOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::LocalResponseNormalizationOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::LSTMOptions *builtin_options_as_LSTMOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_LSTMOptions
-               ? static_cast<const circle::LSTMOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::LSTMOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_ResizeBilinearOptions
-               ? static_cast<const circle::ResizeBilinearOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::ResizeBilinearOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::CallOptions *builtin_options_as_CallOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_CallOptions
-               ? static_cast<const circle::CallOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::CallOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::ReshapeOptions *builtin_options_as_ReshapeOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_ReshapeOptions
-               ? static_cast<const circle::ReshapeOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::ReshapeOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::SkipGramOptions *builtin_options_as_SkipGramOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_SkipGramOptions
-               ? static_cast<const circle::SkipGramOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::SkipGramOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_SpaceToDepthOptions
-               ? static_cast<const circle::SpaceToDepthOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::SpaceToDepthOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::EmbeddingLookupSparseOptions *
   builtin_options_as_EmbeddingLookupSparseOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_EmbeddingLookupSparseOptions
-               ? static_cast<const circle::EmbeddingLookupSparseOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::EmbeddingLookupSparseOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::MulOptions *builtin_options_as_MulOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_MulOptions
-               ? static_cast<const circle::MulOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::MulOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::PadOptions *builtin_options_as_PadOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_PadOptions
-               ? static_cast<const circle::PadOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::PadOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::GatherOptions *builtin_options_as_GatherOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_GatherOptions
-               ? static_cast<const circle::GatherOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::GatherOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_BatchToSpaceNDOptions
-               ? static_cast<const circle::BatchToSpaceNDOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::BatchToSpaceNDOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_SpaceToBatchNDOptions
-               ? static_cast<const circle::SpaceToBatchNDOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::SpaceToBatchNDOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::TransposeOptions *builtin_options_as_TransposeOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_TransposeOptions
-               ? static_cast<const circle::TransposeOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::TransposeOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::ReducerOptions *builtin_options_as_ReducerOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_ReducerOptions
-               ? static_cast<const circle::ReducerOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::ReducerOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::SubOptions *builtin_options_as_SubOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_SubOptions
-               ? static_cast<const circle::SubOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::SubOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::DivOptions *builtin_options_as_DivOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_DivOptions
-               ? static_cast<const circle::DivOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::DivOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::SqueezeOptions *builtin_options_as_SqueezeOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_SqueezeOptions
-               ? static_cast<const circle::SqueezeOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::SqueezeOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_SequenceRNNOptions
-               ? static_cast<const circle::SequenceRNNOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::SequenceRNNOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::StridedSliceOptions *builtin_options_as_StridedSliceOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_StridedSliceOptions
-               ? static_cast<const circle::StridedSliceOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::StridedSliceOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::ExpOptions *builtin_options_as_ExpOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_ExpOptions
-               ? static_cast<const circle::ExpOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::ExpOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::TopKV2Options *builtin_options_as_TopKV2Options() const
   {
     return builtin_options_type() == circle::BuiltinOptions_TopKV2Options
-               ? static_cast<const circle::TopKV2Options *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::TopKV2Options *>(builtin_options())
+             : nullptr;
   }
   const circle::SplitOptions *builtin_options_as_SplitOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_SplitOptions
-               ? static_cast<const circle::SplitOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::SplitOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_LogSoftmaxOptions
-               ? static_cast<const circle::LogSoftmaxOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::LogSoftmaxOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::CastOptions *builtin_options_as_CastOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_CastOptions
-               ? static_cast<const circle::CastOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::CastOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::DequantizeOptions *builtin_options_as_DequantizeOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_DequantizeOptions
-               ? static_cast<const circle::DequantizeOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::DequantizeOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_MaximumMinimumOptions
-               ? static_cast<const circle::MaximumMinimumOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::MaximumMinimumOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::ArgMaxOptions *builtin_options_as_ArgMaxOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_ArgMaxOptions
-               ? static_cast<const circle::ArgMaxOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::ArgMaxOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::LessOptions *builtin_options_as_LessOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_LessOptions
-               ? static_cast<const circle::LessOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::LessOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::NegOptions *builtin_options_as_NegOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_NegOptions
-               ? static_cast<const circle::NegOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::NegOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::PadV2Options *builtin_options_as_PadV2Options() const
   {
     return builtin_options_type() == circle::BuiltinOptions_PadV2Options
-               ? static_cast<const circle::PadV2Options *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::PadV2Options *>(builtin_options())
+             : nullptr;
   }
   const circle::GreaterOptions *builtin_options_as_GreaterOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_GreaterOptions
-               ? static_cast<const circle::GreaterOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::GreaterOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_GreaterEqualOptions
-               ? static_cast<const circle::GreaterEqualOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::GreaterEqualOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::LessEqualOptions *builtin_options_as_LessEqualOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_LessEqualOptions
-               ? static_cast<const circle::LessEqualOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::LessEqualOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::SelectOptions *builtin_options_as_SelectOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_SelectOptions
-               ? static_cast<const circle::SelectOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::SelectOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::SliceOptions *builtin_options_as_SliceOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_SliceOptions
-               ? static_cast<const circle::SliceOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::SliceOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::TransposeConvOptions *builtin_options_as_TransposeConvOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_TransposeConvOptions
-               ? static_cast<const circle::TransposeConvOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::TransposeConvOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_SparseToDenseOptions
-               ? static_cast<const circle::SparseToDenseOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::SparseToDenseOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::TileOptions *builtin_options_as_TileOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_TileOptions
-               ? static_cast<const circle::TileOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::TileOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_ExpandDimsOptions
-               ? static_cast<const circle::ExpandDimsOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::ExpandDimsOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::EqualOptions *builtin_options_as_EqualOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_EqualOptions
-               ? static_cast<const circle::EqualOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::EqualOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::NotEqualOptions *builtin_options_as_NotEqualOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_NotEqualOptions
-               ? static_cast<const circle::NotEqualOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::NotEqualOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::ShapeOptions *builtin_options_as_ShapeOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_ShapeOptions
-               ? static_cast<const circle::ShapeOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::ShapeOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::PowOptions *builtin_options_as_PowOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_PowOptions
-               ? static_cast<const circle::PowOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::PowOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::ArgMinOptions *builtin_options_as_ArgMinOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_ArgMinOptions
-               ? static_cast<const circle::ArgMinOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::ArgMinOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::FakeQuantOptions *builtin_options_as_FakeQuantOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_FakeQuantOptions
-               ? static_cast<const circle::FakeQuantOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::FakeQuantOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::PackOptions *builtin_options_as_PackOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_PackOptions
-               ? static_cast<const circle::PackOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::PackOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::LogicalOrOptions *builtin_options_as_LogicalOrOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_LogicalOrOptions
-               ? static_cast<const circle::LogicalOrOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::LogicalOrOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::OneHotOptions *builtin_options_as_OneHotOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_OneHotOptions
-               ? static_cast<const circle::OneHotOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::OneHotOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::LogicalAndOptions *builtin_options_as_LogicalAndOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_LogicalAndOptions
-               ? static_cast<const circle::LogicalAndOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::LogicalAndOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::LogicalNotOptions *builtin_options_as_LogicalNotOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_LogicalNotOptions
-               ? static_cast<const circle::LogicalNotOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::LogicalNotOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::UnpackOptions *builtin_options_as_UnpackOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_UnpackOptions
-               ? static_cast<const circle::UnpackOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::UnpackOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::FloorDivOptions *builtin_options_as_FloorDivOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_FloorDivOptions
-               ? static_cast<const circle::FloorDivOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::FloorDivOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::SquareOptions *builtin_options_as_SquareOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_SquareOptions
-               ? static_cast<const circle::SquareOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::SquareOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_ZerosLikeOptions
-               ? static_cast<const circle::ZerosLikeOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::ZerosLikeOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::FillOptions *builtin_options_as_FillOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_FillOptions
-               ? static_cast<const circle::FillOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::FillOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::BidirectionalSequenceLSTMOptions *
   builtin_options_as_BidirectionalSequenceLSTMOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_BidirectionalSequenceLSTMOptions
-               ? static_cast<const circle::BidirectionalSequenceLSTMOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::BidirectionalSequenceLSTMOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::BidirectionalSequenceRNNOptions *
   builtin_options_as_BidirectionalSequenceRNNOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_BidirectionalSequenceRNNOptions
-               ? static_cast<const circle::BidirectionalSequenceRNNOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::BidirectionalSequenceRNNOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::UnidirectionalSequenceLSTMOptions *
   builtin_options_as_UnidirectionalSequenceLSTMOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_UnidirectionalSequenceLSTMOptions
-               ? static_cast<const circle::UnidirectionalSequenceLSTMOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::UnidirectionalSequenceLSTMOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::FloorModOptions *builtin_options_as_FloorModOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_FloorModOptions
-               ? static_cast<const circle::FloorModOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::FloorModOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::RangeOptions *builtin_options_as_RangeOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_RangeOptions
-               ? static_cast<const circle::RangeOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::RangeOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::ResizeNearestNeighborOptions *
   builtin_options_as_ResizeNearestNeighborOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_ResizeNearestNeighborOptions
-               ? static_cast<const circle::ResizeNearestNeighborOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::ResizeNearestNeighborOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::LeakyReluOptions *builtin_options_as_LeakyReluOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_LeakyReluOptions
-               ? static_cast<const circle::LeakyReluOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::LeakyReluOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_SquaredDifferenceOptions
-               ? static_cast<const circle::SquaredDifferenceOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::SquaredDifferenceOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::MirrorPadOptions *builtin_options_as_MirrorPadOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_MirrorPadOptions
-               ? static_cast<const circle::MirrorPadOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::MirrorPadOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::AbsOptions *builtin_options_as_AbsOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_AbsOptions
-               ? static_cast<const circle::AbsOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::AbsOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::SplitVOptions *builtin_options_as_SplitVOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_SplitVOptions
-               ? static_cast<const circle::SplitVOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::SplitVOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::UniqueOptions *builtin_options_as_UniqueOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_UniqueOptions
-               ? static_cast<const circle::UniqueOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::UniqueOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::ReverseV2Options *builtin_options_as_ReverseV2Options() const
   {
     return builtin_options_type() == circle::BuiltinOptions_ReverseV2Options
-               ? static_cast<const circle::ReverseV2Options *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::ReverseV2Options *>(builtin_options())
+             : nullptr;
   }
   const circle::AddNOptions *builtin_options_as_AddNOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_AddNOptions
-               ? static_cast<const circle::AddNOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::AddNOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::GatherNdOptions *builtin_options_as_GatherNdOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_GatherNdOptions
-               ? static_cast<const circle::GatherNdOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::GatherNdOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::CosOptions *builtin_options_as_CosOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_CosOptions
-               ? static_cast<const circle::CosOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::CosOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::WhereOptions *builtin_options_as_WhereOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_WhereOptions
-               ? static_cast<const circle::WhereOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::WhereOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::RankOptions *builtin_options_as_RankOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_RankOptions
-               ? static_cast<const circle::RankOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::RankOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_ReverseSequenceOptions
-               ? static_cast<const circle::ReverseSequenceOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::ReverseSequenceOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_MatrixDiagOptions
-               ? static_cast<const circle::MatrixDiagOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::MatrixDiagOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::QuantizeOptions *builtin_options_as_QuantizeOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_QuantizeOptions
-               ? static_cast<const circle::QuantizeOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::QuantizeOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_MatrixSetDiagOptions
-               ? static_cast<const circle::MatrixSetDiagOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::MatrixSetDiagOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::HardSwishOptions *builtin_options_as_HardSwishOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_HardSwishOptions
-               ? static_cast<const circle::HardSwishOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::HardSwishOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::IfOptions *builtin_options_as_IfOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_IfOptions
-               ? static_cast<const circle::IfOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::IfOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::WhileOptions *builtin_options_as_WhileOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_WhileOptions
-               ? static_cast<const circle::WhileOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::WhileOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_DepthToSpaceOptions
-               ? static_cast<const circle::DepthToSpaceOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::DepthToSpaceOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const
   {
     return builtin_options_type() == circle::BuiltinOptions_NonMaxSuppressionV4Options
-               ? static_cast<const circle::NonMaxSuppressionV4Options *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::NonMaxSuppressionV4Options *>(builtin_options())
+             : nullptr;
   }
   const circle::NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const
   {
     return builtin_options_type() == circle::BuiltinOptions_NonMaxSuppressionV5Options
-               ? static_cast<const circle::NonMaxSuppressionV5Options *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::NonMaxSuppressionV5Options *>(builtin_options())
+             : nullptr;
   }
   const circle::ScatterNdOptions *builtin_options_as_ScatterNdOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_ScatterNdOptions
-               ? static_cast<const circle::ScatterNdOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::ScatterNdOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::SelectV2Options *builtin_options_as_SelectV2Options() const
   {
     return builtin_options_type() == circle::BuiltinOptions_SelectV2Options
-               ? static_cast<const circle::SelectV2Options *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::SelectV2Options *>(builtin_options())
+             : nullptr;
   }
   const circle::DensifyOptions *builtin_options_as_DensifyOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_DensifyOptions
-               ? static_cast<const circle::DensifyOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::DensifyOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::SegmentSumOptions *builtin_options_as_SegmentSumOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_SegmentSumOptions
-               ? static_cast<const circle::SegmentSumOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::SegmentSumOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_BatchMatMulOptions
-               ? static_cast<const circle::BatchMatMulOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::BatchMatMulOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::BCQGatherOptions *builtin_options_as_BCQGatherOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_BCQGatherOptions
-               ? static_cast<const circle::BCQGatherOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::BCQGatherOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::BCQFullyConnectedOptions *builtin_options_as_BCQFullyConnectedOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_BCQFullyConnectedOptions
-               ? static_cast<const circle::BCQFullyConnectedOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::BCQFullyConnectedOptions *>(builtin_options())
+             : nullptr;
   }
   const circle::InstanceNormOptions *builtin_options_as_InstanceNormOptions() const
   {
     return builtin_options_type() == circle::BuiltinOptions_InstanceNormOptions
-               ? static_cast<const circle::InstanceNormOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const circle::InstanceNormOptions *>(builtin_options())
+             : nullptr;
   }
   const flatbuffers::Vector<uint8_t> *custom_options() const
   {
@@ -9558,7 +9554,7 @@ struct OperatorBuilder
                             static_cast<int8_t>(custom_options_format), 0);
   }
   void add_mutating_variable_inputs(
-      flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
+    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
   {
     fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs);
   }
@@ -9580,15 +9576,15 @@ struct OperatorBuilder
 };
 
 inline flatbuffers::Offset<Operator> CreateOperator(
-    flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
-    circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE,
-    flatbuffers::Offset<void> builtin_options = 0,
-    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
-    circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS,
-    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0)
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+  circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE,
+  flatbuffers::Offset<void> builtin_options = 0,
+  flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
+  circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS,
+  flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0)
 {
   OperatorBuilder builder_(_fbb);
   builder_.add_intermediates(intermediates);
@@ -9604,20 +9600,20 @@ inline flatbuffers::Offset<Operator> CreateOperator(
 }
 
 inline flatbuffers::Offset<Operator> CreateOperatorDirect(
-    flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
-    const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
-    circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE,
-    flatbuffers::Offset<void> builtin_options = 0,
-    const std::vector<uint8_t> *custom_options = nullptr,
-    circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS,
-    const std::vector<uint8_t> *mutating_variable_inputs = nullptr,
-    const std::vector<int32_t> *intermediates = nullptr)
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+  const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
+  circle::BuiltinOptions builtin_options_type = circle::BuiltinOptions_NONE,
+  flatbuffers::Offset<void> builtin_options = 0,
+  const std::vector<uint8_t> *custom_options = nullptr,
+  circle::CustomOptionsFormat custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS,
+  const std::vector<uint8_t> *mutating_variable_inputs = nullptr,
+  const std::vector<int32_t> *intermediates = nullptr)
 {
   auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
   auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
   auto custom_options__ = custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0;
   auto mutating_variable_inputs__ =
-      mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0;
+    mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0;
   auto intermediates__ = intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0;
   return circle::CreateOperator(_fbb, opcode_index, inputs__, outputs__, builtin_options_type,
                                 builtin_options, custom_options__, custom_options_format,
@@ -9651,7 +9647,7 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   const flatbuffers::Vector<flatbuffers::Offset<circle::Operator>> *operators() const
   {
     return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::Operator>> *>(
-        VT_OPERATORS);
+      VT_OPERATORS);
   }
   const flatbuffers::String *name() const
   {
@@ -9693,7 +9689,7 @@ struct SubGraphBuilder
     fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs);
   }
   void add_operators(
-      flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators)
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators)
   {
     fbb_.AddOffset(SubGraph::VT_OPERATORS, operators);
   }
@@ -9719,13 +9715,13 @@ struct SubGraphBuilder
 };
 
 inline flatbuffers::Offset<SubGraph> CreateSubGraph(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>> tensors = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators = 0,
-    flatbuffers::Offset<flatbuffers::String> name = 0,
-    circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>> tensors = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>> operators = 0,
+  flatbuffers::Offset<flatbuffers::String> name = 0,
+  circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST)
 {
   SubGraphBuilder builder_(_fbb);
   builder_.add_name(name);
@@ -9738,17 +9734,17 @@ inline flatbuffers::Offset<SubGraph> CreateSubGraph(
 }
 
 inline flatbuffers::Offset<SubGraph> CreateSubGraphDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<flatbuffers::Offset<circle::Tensor>> *tensors = nullptr,
-    const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
-    const std::vector<flatbuffers::Offset<circle::Operator>> *operators = nullptr,
-    const char *name = nullptr, circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  const std::vector<flatbuffers::Offset<circle::Tensor>> *tensors = nullptr,
+  const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
+  const std::vector<flatbuffers::Offset<circle::Operator>> *operators = nullptr,
+  const char *name = nullptr, circle::DataFormat data_format = circle::DataFormat_CHANNELS_LAST)
 {
   auto tensors__ = tensors ? _fbb.CreateVector<flatbuffers::Offset<circle::Tensor>>(*tensors) : 0;
   auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
   auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
   auto operators__ =
-      operators ? _fbb.CreateVector<flatbuffers::Offset<circle::Operator>>(*operators) : 0;
+    operators ? _fbb.CreateVector<flatbuffers::Offset<circle::Operator>>(*operators) : 0;
   auto name__ = name ? _fbb.CreateString(name) : 0;
   return circle::CreateSubGraph(_fbb, tensors__, inputs__, outputs__, operators__, name__,
                                 data_format);
@@ -9893,12 +9889,12 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   const flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>> *operator_codes() const
   {
     return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>> *>(
-        VT_OPERATOR_CODES);
+      VT_OPERATOR_CODES);
   }
   const flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>> *subgraphs() const
   {
     return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>> *>(
-        VT_SUBGRAPHS);
+      VT_SUBGRAPHS);
   }
   const flatbuffers::String *description() const
   {
@@ -9915,7 +9911,7 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   const flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>> *metadata() const
   {
     return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>> *>(
-        VT_METADATA);
+      VT_METADATA);
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -9939,13 +9935,13 @@ struct ModelBuilder
   flatbuffers::uoffset_t start_;
   void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); }
   void add_operator_codes(
-      flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>>
-          operator_codes)
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>>
+      operator_codes)
   {
     fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes);
   }
   void add_subgraphs(
-      flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs)
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs)
   {
     fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs);
   }
@@ -9963,7 +9959,7 @@ struct ModelBuilder
     fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer);
   }
   void add_metadata(
-      flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata)
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata)
   {
     fbb_.AddOffset(Model::VT_METADATA, metadata);
   }
@@ -9981,14 +9977,14 @@ struct ModelBuilder
 };
 
 inline flatbuffers::Offset<Model> CreateModel(
-    flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>>
-        operator_codes = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs = 0,
-    flatbuffers::Offset<flatbuffers::String> description = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>> buffers = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata = 0)
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>>>
+    operator_codes = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>> subgraphs = 0,
+  flatbuffers::Offset<flatbuffers::String> description = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>> buffers = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata = 0)
 {
   ModelBuilder builder_(_fbb);
   builder_.add_metadata(metadata);
@@ -10002,24 +9998,24 @@ inline flatbuffers::Offset<Model> CreateModel(
 }
 
 inline flatbuffers::Offset<Model> CreateModelDirect(
-    flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
-    const std::vector<flatbuffers::Offset<circle::OperatorCode>> *operator_codes = nullptr,
-    const std::vector<flatbuffers::Offset<circle::SubGraph>> *subgraphs = nullptr,
-    const char *description = nullptr,
-    const std::vector<flatbuffers::Offset<circle::Buffer>> *buffers = nullptr,
-    const std::vector<int32_t> *metadata_buffer = nullptr,
-    const std::vector<flatbuffers::Offset<circle::Metadata>> *metadata = nullptr)
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+  const std::vector<flatbuffers::Offset<circle::OperatorCode>> *operator_codes = nullptr,
+  const std::vector<flatbuffers::Offset<circle::SubGraph>> *subgraphs = nullptr,
+  const char *description = nullptr,
+  const std::vector<flatbuffers::Offset<circle::Buffer>> *buffers = nullptr,
+  const std::vector<int32_t> *metadata_buffer = nullptr,
+  const std::vector<flatbuffers::Offset<circle::Metadata>> *metadata = nullptr)
 {
   auto operator_codes__ =
-      operator_codes ? _fbb.CreateVector<flatbuffers::Offset<circle::OperatorCode>>(*operator_codes)
-                     : 0;
+    operator_codes ? _fbb.CreateVector<flatbuffers::Offset<circle::OperatorCode>>(*operator_codes)
+                   : 0;
   auto subgraphs__ =
-      subgraphs ? _fbb.CreateVector<flatbuffers::Offset<circle::SubGraph>>(*subgraphs) : 0;
+    subgraphs ? _fbb.CreateVector<flatbuffers::Offset<circle::SubGraph>>(*subgraphs) : 0;
   auto description__ = description ? _fbb.CreateString(description) : 0;
   auto buffers__ = buffers ? _fbb.CreateVector<flatbuffers::Offset<circle::Buffer>>(*buffers) : 0;
   auto metadata_buffer__ = metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0;
   auto metadata__ =
-      metadata ? _fbb.CreateVector<flatbuffers::Offset<circle::Metadata>>(*metadata) : 0;
+    metadata ? _fbb.CreateVector<flatbuffers::Offset<circle::Metadata>>(*metadata) : 0;
   return circle::CreateModel(_fbb, version, operator_codes__, subgraphs__, description__, buffers__,
                              metadata_buffer__, metadata__);
 }
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
index 81cd38f4f..63036a398 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
@@ -20,7 +20,9 @@
 
 // TODO Support multiple subgraphs
 ANeuralNetworksCompilation::ANeuralNetworksCompilation(const ANeuralNetworksModel *model) noexcept
-    : _subgraphs{model->getSubGraphs()}, _compiler{new onert::compiler::Compiler{_subgraphs}}
+  : _subgraphs{model->getSubGraphs()}, _tracing_ctx{std::make_unique<onert::util::TracingCtx>(
+                                         _subgraphs.get())},
+    _compiler{new onert::compiler::Compiler{_subgraphs, _tracing_ctx.get()}}
 {
   if (model->allowedToFp16())
   {
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
index 5f0650b9a..bd61f9d86 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
@@ -23,6 +23,7 @@
 #include "ir/Graph.h"
 #include "ir/Subgraphs.h"
 #include "exec/IExecutor.h"
+#include "util/TracingCtx.h"
 
 struct ANeuralNetworksCompilation
 {
@@ -40,6 +41,14 @@ public:
 
 private:
   std::shared_ptr<onert::ir::Subgraphs> _subgraphs;
+  // TODO Refine the ownership of TracingCtx
+  // In case of nnfw API, nnfw_session has ownership of TracingCtx.
+  // In case of nnapi, there is no concept of session and primary model might have the ownership
+  // of TracingCtx.
+  // Since we don't support multiple models yet with nnapi in ONE, let's implement this later
+  // and let's make it work with one model for now.
+  std::unique_ptr<onert::util::TracingCtx> _tracing_ctx;
+
   std::shared_ptr<onert::compiler::Compiler> _compiler;
   std::shared_ptr<onert::exec::ExecutorMap> _executors;
 };
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc
index 2bea729be..b0ea51917 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksEvent.cc
@@ -20,7 +20,7 @@
 #include "util/logging.h"
 
 ANeuralNetworksEvent::ANeuralNetworksEvent(const std::shared_ptr<onert::exec::Execution> &execution)
-    : _execution{execution}
+  : _execution{execution}
 {
   // DO NOTHING
 }
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc
index 6114b74b0..21c7cdd6f 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.cc
@@ -140,8 +140,8 @@ bool ANeuralNetworksExecution::setInput(uint32_t index, const ANeuralNetworksOpe
 
     const auto type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo();
     const auto shape = (type != nullptr)
-                           ? NNAPIConvert::getShape(type)
-                           : _execution->primary_subgraph().operands().at(operand_index).shape();
+                         ? NNAPIConvert::getShape(type)
+                         : _execution->primary_subgraph().operands().at(operand_index).shape();
 
     // NOTE The nnapi does not provide setting io_layout and not support changing layout. In other
     // words, we can assume that io_layout from nnapi always is the same as layout of the used
@@ -173,8 +173,8 @@ bool ANeuralNetworksExecution::setOptionalInput(uint32_t index,
 
     const auto type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo();
     const auto shape = (type != nullptr)
-                           ? NNAPIConvert::getShape(type)
-                           : _execution->primary_subgraph().operands().at(operand_index).shape();
+                         ? NNAPIConvert::getShape(type)
+                         : _execution->primary_subgraph().operands().at(operand_index).shape();
 
     // ANeuralNetworksExecution::setInput() uses only shape information
     ANeuralNetworksOperandType optional_input_type;
@@ -208,8 +208,8 @@ bool ANeuralNetworksExecution::setOutput(uint32_t index, const ANeuralNetworksOp
 
     const auto type_info = _execution->primary_subgraph().operands().at(operand_index).typeInfo();
     const auto shape = (type != nullptr)
-                           ? NNAPIConvert::getShape(type)
-                           : _execution->primary_subgraph().operands().at(operand_index).shape();
+                         ? NNAPIConvert::getShape(type)
+                         : _execution->primary_subgraph().operands().at(operand_index).shape();
 
     // NOTE The nnapi does not provide setting io_layout and not support changing layout. In other
     // words, we can assume that io_layout from nnapi always is the same as layout of the used
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
index 1f4b868f6..70c5d2a4b 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
@@ -27,7 +27,7 @@ struct ANeuralNetworksExecution
 {
 public:
   ANeuralNetworksExecution(const std::shared_ptr<onert::exec::ExecutorMap> &executors)
-      : _execution{std::make_shared<onert::exec::Execution>(executors)}
+    : _execution{std::make_shared<onert::exec::Execution>(executors)}
   {
     // DO NOTHING
   }
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
index 97b820aea..3e2bea114 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
@@ -27,7 +27,7 @@
 // ANeuralNetworksModel
 //
 ANeuralNetworksModel::ANeuralNetworksModel() noexcept
-    : _optional_operands{}, _operand_usages{}, _allowFloat32toFloat16{false}
+  : _optional_operands{}, _operand_usages{}, _allowFloat32toFloat16{false}
 {
   _graph = std::make_shared<onert::ir::Graph>();
 }
@@ -72,12 +72,12 @@ bool ANeuralNetworksModel::setOperandValue(uint32_t index, const void *buffer, s
     if (copy)
     {
       _graph->operands().at(ind).data(
-          std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(buffer), length));
+        std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(buffer), length));
     }
     else
     {
       _graph->operands().at(ind).data(
-          std::make_unique<ExternalData>(reinterpret_cast<const uint8_t *>(buffer), length));
+        std::make_unique<ExternalData>(reinterpret_cast<const uint8_t *>(buffer), length));
     }
   }
   catch (const std::exception &e)
@@ -111,9 +111,9 @@ bool ANeuralNetworksModel::addOperation(ANeuralNetworksOperationType type, uint3
     if (type == ANEURALNETWORKS_FULLY_CONNECTED)
     {
       const auto &input_operand =
-          _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::INPUT));
+        _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::INPUT));
       auto &weights_operand =
-          _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::WEIGHT));
+        _graph->operands().at(node->getInputs().at(onert::ir::operation::FullyConnected::WEIGHT));
       if (input_operand.typeInfo().type() == onert::ir::DataType::FLOAT32 &&
           weights_operand.typeInfo().type() == onert::ir::DataType::QUANT_UINT8_ASYMM)
       {
diff --git a/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc b/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc
index 63d4e3c09..94b8f02f5 100644
--- a/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/NNAPIConvert.cc
@@ -39,6 +39,13 @@ DataType NNAPIConvert::getDataType(OperandCode type)
     case ANEURALNETWORKS_BOOL:
     case ANEURALNETWORKS_TENSOR_BOOL8:
       return DataType::BOOL8;
+    case ANEURALNETWORKS_TENSOR_FLOAT16:
+    case ANEURALNETWORKS_FLOAT16:
+      return DataType::FLOAT16;
+    case ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL:
+      return DataType::QUANT_INT8_SYMM_PER_CHANNEL;
+    case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED:
+      return DataType::QUANT_INT8_ASYMM;
     default:
       throw std::runtime_error("Unsupported type");
   }
diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
index a84ce1b8d..9ecb7d190 100644
--- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
@@ -107,7 +107,7 @@ getElementwiseActivationGenerator(const onert::ir::operation::ElementwiseActivat
 }
 
 OperationFactory::Generator getElementwiseBinaryGenerator(
-    const onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
+  const onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
 {
   return [op_type](const OperationFactory::Param &init_param, Operands &) {
     assert(init_param.input_count == 2);
@@ -182,7 +182,7 @@ getBinaryArithmeticGenerator(const onert::ir::operation::BinaryArithmetic::Arith
     param.arithmetic_type = op_type;
     const auto activation_index = OperandIndex{init_param.inputs[2]};
     param.activation =
-        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+      NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
 
     return new operation::BinaryArithmetic{inputs, outputs, param};
   };
@@ -221,12 +221,12 @@ getPool2DGenerator(const onert::ir::operation::Pool2D::PoolType pool_type)
       const auto activation_index = OperandIndex{init_param.inputs[6]};
 
       param.padding.type =
-          NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+        NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
       param.stride = makeStride(operands, hstride_index, vstride_index);
       param.kw = getUint32Scalar(operands, kw_index);
       param.kh = operands.at(kh_index).asScalar<uint32_t>();
       param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
     }
     else // support explicit padding
     {
@@ -259,7 +259,7 @@ getPool2DGenerator(const onert::ir::operation::Pool2D::PoolType pool_type)
       param.kw = getUint32Scalar(operands, kw_index);
       param.kh = getUint32Scalar(operands, kh_index);
       param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
     }
     return new operation::Pool2D{inputs, outputs, param};
   };
@@ -382,11 +382,11 @@ OperationFactory::OperationFactory()
       const auto activation_index = OperandIndex{init_param.inputs[7]};
 
       param.padding.type =
-          NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+        NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
       param.stride = makeStride(operands, hstride_index, vstride_index);
       param.multiplier = getUint32Scalar(operands, multiplier_index);
       param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
     }
     else
     {
@@ -417,7 +417,7 @@ OperationFactory::OperationFactory()
       param.stride = makeStride(operands, hstride_index, vstride_index);
       param.multiplier = getUint32Scalar(operands, multiplier_index);
       param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
     }
 
     // TODO set dilation
@@ -490,7 +490,7 @@ OperationFactory::OperationFactory()
     operation::FullyConnected::Param param;
     const auto activation_index = OperandIndex{init_param.inputs[3]};
     param.activation =
-        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+      NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
     param.weights_format = FullyConnectedWeightsFormat::Default;
 
     return new operation::FullyConnected{inputs, outputs, param};
@@ -517,7 +517,7 @@ OperationFactory::OperationFactory()
   };
 
   _map[ANEURALNETWORKS_CAST] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST);
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST);
 
   // ANEURALNETWORKS_CAST_EX is deprecated
   // TODO Remove ANEURALNETWORKS_CAST_EX
@@ -557,14 +557,14 @@ OperationFactory::OperationFactory()
       const auto activation_index = OperandIndex{init_param.inputs[6]};
 
       param.padding.type =
-          NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+        NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
       param.stride = makeStride(operands, hstride_index, vstride_index);
 
       param.dilation.width_factor = 1;
       param.dilation.height_factor = 1;
 
       param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
     }
     else if (init_param.input_count == 10) // support explicit padding
     {
@@ -595,7 +595,7 @@ OperationFactory::OperationFactory()
       param.dilation.height_factor = 1;
 
       param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
     }
     else if (init_param.input_count == 13) // support dilation
     {
@@ -633,7 +633,7 @@ OperationFactory::OperationFactory()
       param.dilation.height_factor = height_factor;
 
       param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
     }
     else
     {
@@ -644,19 +644,19 @@ OperationFactory::OperationFactory()
   };
 
   _map[ANEURALNETWORKS_ADD] =
-      getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::ADD);
+    getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::ADD);
 
   _map[ANEURALNETWORKS_ADDV2_EX] = _map[ANEURALNETWORKS_ADD];
 
   _map[ANEURALNETWORKS_REDUCE_SUM] =
-      getReduceGenerator(onert::ir::operation::Reduce::ReduceType::SUM);
+    getReduceGenerator(onert::ir::operation::Reduce::ReduceType::SUM);
 
   // ANEURALNETWORKS_REDUCE_SUM_EX is deprecated
   // TODO Remove ANEURALNETWORKS_REDUCE_SUM_EX
   _map[ANEURALNETWORKS_REDUCE_SUM_EX] = _map[ANEURALNETWORKS_REDUCE_SUM];
 
   _map[ANEURALNETWORKS_SUB] =
-      getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB);
+    getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB);
 
   _map[ANEURALNETWORKS_SLICE] = [](const OperationFactory::Param &init_param, Operands &) {
     assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -708,7 +708,7 @@ OperationFactory::OperationFactory()
     param.begin_mask = operands.at(OperandIndex{init_param.inputs[4]}).asScalar<std::int32_t>();
     param.end_mask = operands.at(OperandIndex{init_param.inputs[5]}).asScalar<std::int32_t>();
     param.shrink_axis_mask =
-        operands.at(OperandIndex{init_param.inputs[6]}).asScalar<std::int32_t>();
+      operands.at(OperandIndex{init_param.inputs[6]}).asScalar<std::int32_t>();
 
     return new operation::StridedSlice{inputs, outputs, param};
   };
@@ -716,7 +716,7 @@ OperationFactory::OperationFactory()
   _map[ANEURALNETWORKS_TRANSPOSE] = createSimpleBinaryOp<operation::Transpose>;
 
   _map[ANEURALNETWORKS_MUL] =
-      getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::MUL);
+    getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::MUL);
 
   _map[ANEURALNETWORKS_SQUEEZE] = [](const OperationFactory::Param &init_param,
                                      Operands &operands) {
@@ -758,15 +758,15 @@ OperationFactory::OperationFactory()
   };
 
   _map[ANEURALNETWORKS_TANH] = getElementwiseActivationGenerator(
-      onert::ir::operation::ElementwiseActivation::Type::TANH, 1.f, 1.f);
+    onert::ir::operation::ElementwiseActivation::Type::TANH, 1.f, 1.f);
 
   _map[ANEURALNETWORKS_LOG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOG);
 
-  _map[ANEURALNETWORKS_LOGISTIC] = getElementwiseActivationGenerator(
-      onert::ir::operation::ElementwiseActivation::Type::LOGISTIC);
+  _map[ANEURALNETWORKS_LOGISTIC] =
+    getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::LOGISTIC);
 
   _map[ANEURALNETWORKS_DIV] =
-      getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::DIV);
+    getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::DIV);
 
   _map[ANEURALNETWORKS_EXP] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::EXP);
 
@@ -780,16 +780,16 @@ OperationFactory::OperationFactory()
   _map[ANEURALNETWORKS_EXPAND_DIMS] = createSimpleBinaryOp<operation::ExpandDims>;
 
   _map[ANEURALNETWORKS_GREATER] =
-      getComparisonGenerator(operation::Comparison::ComparisonType::Greater);
+    getComparisonGenerator(operation::Comparison::ComparisonType::Greater);
   _map[ANEURALNETWORKS_GREATER_EQUAL] =
-      getComparisonGenerator(operation::Comparison::ComparisonType::GreaterEqual);
+    getComparisonGenerator(operation::Comparison::ComparisonType::GreaterEqual);
   _map[ANEURALNETWORKS_LESS] = getComparisonGenerator(operation::Comparison::ComparisonType::Less);
   _map[ANEURALNETWORKS_LESS_EQUAL] =
-      getComparisonGenerator(operation::Comparison::ComparisonType::LessEqual);
+    getComparisonGenerator(operation::Comparison::ComparisonType::LessEqual);
   _map[ANEURALNETWORKS_NOT_EQUAL] =
-      getComparisonGenerator(operation::Comparison::ComparisonType::NotEqual);
+    getComparisonGenerator(operation::Comparison::ComparisonType::NotEqual);
   _map[ANEURALNETWORKS_EQUAL] =
-      getComparisonGenerator(operation::Comparison::ComparisonType::Equal);
+    getComparisonGenerator(operation::Comparison::ComparisonType::Equal);
 
   // ANEURALNETWORKS_GREATER_EQUAL_EX is deprecated
   // TODO Remove ANEURALNETWORKS_GREATER_EQUAL_EX
@@ -838,13 +838,13 @@ OperationFactory::OperationFactory()
   };
 
   _map[ANEURALNETWORKS_REDUCE_ALL] =
-      getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ALL);
+    getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ALL);
 
   _map[ANEURALNETWORKS_REDUCE_ANY] =
-      getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ANY);
+    getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ANY);
 
   _map[ANEURALNETWORKS_REDUCE_MAX] =
-      getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MAX);
+    getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MAX);
 
   // ANEURALNETWORKS_REDUCE_MAX_EX is deprecated
   // TODO Remove ANEURALNETWORKS_REDUCE_MAX_EX
@@ -873,8 +873,8 @@ OperationFactory::OperationFactory()
     return new operation::Comparison{inputs, outputs, param};
   };
 
-  _map[ANEURALNETWORKS_LOGICAL_AND] = getElementwiseBinaryGenerator(
-      operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND);
+  _map[ANEURALNETWORKS_LOGICAL_AND] =
+    getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND);
 
   // ANEURALNETWORKS_LOGICAL_AND_EX is deprecated
   // TODO Remove ANEURALNETWORKS_LOGICAL_AND_EX
@@ -902,7 +902,7 @@ OperationFactory::OperationFactory()
   };
 
   _map[ANEURALNETWORKS_RSQRT] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT);
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT);
 
   _map[ANEURALNETWORKS_SELECT] = [](const OperationFactory::Param &init_param, Operands &) {
     assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -939,8 +939,8 @@ OperationFactory::OperationFactory()
   _map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT];
 
   _map[ANEURALNETWORKS_RELU] =
-      getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU,
-                                        onert::ir::operation::ElementwiseActivation::infinity, 0);
+    getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU,
+                                      onert::ir::operation::ElementwiseActivation::infinity, 0);
 
   _map[ANEURALNETWORKS_RESIZE_BILINEAR] = [](const OperationFactory::Param &init_param,
                                              Operands &operands) {
@@ -986,10 +986,10 @@ OperationFactory::OperationFactory()
   };
 
   _map[ANEURALNETWORKS_RELU1] = getElementwiseActivationGenerator(
-      onert::ir::operation::ElementwiseActivation::Type::RELU, 1.f, -1.f);
+    onert::ir::operation::ElementwiseActivation::Type::RELU, 1.f, -1.f);
 
   _map[ANEURALNETWORKS_RELU6] = getElementwiseActivationGenerator(
-      onert::ir::operation::ElementwiseActivation::Type::RELU, 6.f, 0.f);
+    onert::ir::operation::ElementwiseActivation::Type::RELU, 6.f, 0.f);
 
   _map[ANEURALNETWORKS_REVERSE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
     assert(init_param.input_count == 2 && init_param.output_count == 1);
@@ -1031,13 +1031,13 @@ OperationFactory::OperationFactory()
     operation::RNN::Param param;
     const auto activation_index = OperandIndex{init_param.inputs[5]};
     param.activation =
-        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+      NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
 
     return new operation::RNN{inputs, outputs, param};
   };
 
   _map[ANEURALNETWORKS_FLOOR] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::FLOOR);
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::FLOOR);
 
   _map[ANEURALNETWORKS_SPACE_TO_BATCH_ND] = [](const OperationFactory::Param &init_param,
                                                Operands &) {
@@ -1169,21 +1169,21 @@ OperationFactory::OperationFactory()
     const auto vstride_index = OperandIndex{init_param.inputs[5]};
 
     param.padding.type =
-        NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+      NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
     param.stride = makeStride(operands, hstride_index, vstride_index);
 
     return new operation::TransposeConv{inputs, outputs, param};
   };
 
   _map[ANEURALNETWORKS_SQRT] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT);
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT);
 
   // ANEURALNETWORKS_SQRT_EX is deprecated
   // TODO Remove ANEURALNETWORKS_SQRT_EX
   _map[ANEURALNETWORKS_SQRT_EX] = _map[ANEURALNETWORKS_SQRT];
 
-  _map[ANEURALNETWORKS_LOGICAL_OR] = getElementwiseBinaryGenerator(
-      operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
+  _map[ANEURALNETWORKS_LOGICAL_OR] =
+    getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
 
   // ANEURALNETWORKS_LOGICAL_OR_EX is deprecated
   // TODO Remove ANEURALNETWORKS_LOGICAL_OR_EX
@@ -1211,7 +1211,7 @@ OperationFactory::OperationFactory()
   };
 
   _map[ANEURALNETWORKS_LOGICAL_NOT] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT);
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT);
 
   // ANEURALNETWORKS_LOGICAL_NOT_EX is deprecated
   // TODO Remove ANEURALNETWORKS_LOGICAL_NOT_EX
@@ -1370,9 +1370,9 @@ OperationFactory::OperationFactory()
     // 2 -> Cell State Out Tensor Index
     const OperandIndex scratch_buffer_index;
     OperandIndex output_state_index =
-        init_param.output_count >= 2 ? OperandIndex{init_param.outputs[1]} : OperandIndex();
+      init_param.output_count >= 2 ? OperandIndex{init_param.outputs[1]} : OperandIndex();
     OperandIndex cell_state_index =
-        init_param.output_count >= 3 ? OperandIndex{init_param.outputs[2]} : OperandIndex();
+      init_param.output_count >= 3 ? OperandIndex{init_param.outputs[2]} : OperandIndex();
     const OperandIndex output_index = OperandIndex{init_param.outputs[0]};
     OperandIndexSequence outputs{scratch_buffer_index, output_state_index, cell_state_index,
                                  output_index};
@@ -1519,19 +1519,39 @@ OperationFactory::OperationFactory()
     //  1 -> Axis Tensor Index
     OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
 
-    operation::ArgMax::Param param;
+    operation::ArgMinMax::Param param;
     // NNAPI ARGMAX output type is always int32
     param.output_type = DataType::INT32;
+    param.is_arg_max = true;
 
-    return new operation::ArgMax{inputs, outputs, param};
+    return new operation::ArgMinMax{inputs, outputs, param};
   };
 
   // ANEURALNETWORKS_ARGMAX_EX is deprecated
   // TODO Remove ANEURALNETWORKS_ARGMAX_EX
   _map[ANEURALNETWORKS_ARGMAX_EX] = _map[ANEURALNETWORKS_ARGMAX];
 
+  _map[ANEURALNETWORKS_ARGMIN] = [](const OperationFactory::Param &init_param, Operands &) {
+    assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+    OperandIndexSequence outputs{init_param.outputs[0]};
+
+    // Each input should be interpreted as follows:
+    //
+    //  0 -> Input Tensor Index
+    //  1 -> Axis Tensor Index
+    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+
+    operation::ArgMinMax::Param param;
+    // NNAPI ARGMIN output type is always int32
+    param.output_type = DataType::INT32;
+    param.is_arg_max = false;
+
+    return new operation::ArgMinMax{inputs, outputs, param};
+  };
+
   _map[ANEURALNETWORKS_DEQUANTIZE] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::DEQUANTIZE);
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::DEQUANTIZE);
 
   _map[ANEURALNETWORKS_MEAN] = [](const OperationFactory::Param &init_param, Operands &operands) {
     assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -1608,7 +1628,7 @@ OperationFactory::OperationFactory()
   };
 
   _map[ANEURALNETWORKS_REDUCE_MIN] =
-      getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MIN);
+    getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MIN);
 
   // ANEURALNETWORKS_REDUCE_MIN_EX is deprecated
   // TODO Remove ANEURALNETWORKS_REDUCE_MIN_EX
@@ -1689,10 +1709,10 @@ OperationFactory::OperationFactory()
   _map[ANEURALNETWORKS_PAD_V2] = _map[ANEURALNETWORKS_PAD];
 
   _map[ANEURALNETWORKS_MINIMUM] =
-      getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MIN);
+    getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MIN);
 
   _map[ANEURALNETWORKS_MAXIMUM] =
-      getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MAX);
+    getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MAX);
 
   _map[ANEURALNETWORKS_ONE_HOT_EX] = [](const OperationFactory::Param &init_param,
                                         Operands &operands) {
@@ -1719,7 +1739,7 @@ OperationFactory::OperationFactory()
   };
 
   _map[ANEURALNETWORKS_COS_EX] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::COS);
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::COS);
 
   _map[ANEURALNETWORKS_SIN] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SIN);
 
@@ -1733,10 +1753,10 @@ OperationFactory::OperationFactory()
   };
 
   _map[ANEURALNETWORKS_REDUCE_PROD] =
-      getReduceGenerator(onert::ir::operation::Reduce::ReduceType::PROD);
+    getReduceGenerator(onert::ir::operation::Reduce::ReduceType::PROD);
 
   _map[ANEURALNETWORKS_ROUND_EX] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ROUND);
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ROUND);
 
   _map[ANEURALNETWORKS_RANGE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
     assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -1764,7 +1784,7 @@ OperationFactory::OperationFactory()
   _map[ANEURALNETWORKS_FILL_EX] = createSimpleBinaryOp<operation::Fill>;
 
   _map[ANEURALNETWORKS_ZEROS_LIKE_EX] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ZEROS_LIKE);
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ZEROS_LIKE);
   // Each input should be interpreted as follows:
   //  0 -> Input Tensor Index
   //  1 -> Multiple Tensor Index
@@ -1904,7 +1924,7 @@ OperationFactory::OperationFactory()
   };
 
   _map[ANEURALNETWORKS_QUANTIZE] =
-      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::QUANTIZE);
+    getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::QUANTIZE);
 }
 
 Operation *OperationFactory::create(ANeuralNetworksOperationType type,
diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h
index 367cf74db..74e187421 100644
--- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h
+++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.h
@@ -40,7 +40,7 @@ public:
 
 public:
   using Generator =
-      std::function<onert::ir::Operation *(const OperationFactory::Param &, onert::ir::Operands &)>;
+    std::function<onert::ir::Operation *(const OperationFactory::Param &, onert::ir::Operands &)>;
 
 public:
   static OperationFactory &get();
diff --git a/runtime/onert/frontend/tflite/src/tflite_schema_generated.h b/runtime/onert/frontend/tflite/src/tflite_schema_generated.h
index c6e9147cd..8e1b84e29 100644
--- a/runtime/onert/frontend/tflite/src/tflite_schema_generated.h
+++ b/runtime/onert/frontend/tflite/src/tflite_schema_generated.h
@@ -1710,9 +1710,8 @@ enum ActivationFunctionType
 inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6]
 {
   static const ActivationFunctionType values[] = {
-      ActivationFunctionType_NONE,         ActivationFunctionType_RELU,
-      ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6,
-      ActivationFunctionType_TANH,         ActivationFunctionType_SIGN_BIT};
+    ActivationFunctionType_NONE,  ActivationFunctionType_RELU, ActivationFunctionType_RELU_N1_TO_1,
+    ActivationFunctionType_RELU6, ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT};
   return values;
 }
 
@@ -1768,8 +1767,8 @@ enum FullyConnectedOptionsWeightsFormat
 inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[2]
 {
   static const FullyConnectedOptionsWeightsFormat values[] = {
-      FullyConnectedOptionsWeightsFormat_DEFAULT,
-      FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8};
+    FullyConnectedOptionsWeightsFormat_DEFAULT,
+    FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8};
   return values;
 }
 
@@ -1981,8 +1980,8 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
   const CustomQuantization *details_as_CustomQuantization() const
   {
     return details_type() == QuantizationDetails_CustomQuantization
-               ? static_cast<const CustomQuantization *>(details())
-               : nullptr;
+             ? static_cast<const CustomQuantization *>(details())
+             : nullptr;
   }
   int32_t quantized_dimension() const { return GetField<int32_t>(VT_QUANTIZED_DIMENSION, 0); }
   bool Verify(flatbuffers::Verifier &verifier) const
@@ -2072,17 +2071,17 @@ CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb,
 }
 
 inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect(
-    flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
-    const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
-    const std::vector<int64_t> *zero_point = nullptr,
-    QuantizationDetails details_type = QuantizationDetails_NONE,
-    flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+  flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
+  const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
+  const std::vector<int64_t> *zero_point = nullptr,
+  QuantizationDetails details_type = QuantizationDetails_NONE,
+  flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
 {
   return onert_tflite::CreateQuantizationParameters(
-      _fbb, min ? _fbb.CreateVector<float>(*min) : 0, max ? _fbb.CreateVector<float>(*max) : 0,
-      scale ? _fbb.CreateVector<float>(*scale) : 0,
-      zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0, details_type, details,
-      quantized_dimension);
+    _fbb, min ? _fbb.CreateVector<float>(*min) : 0, max ? _fbb.CreateVector<float>(*max) : 0,
+    scale ? _fbb.CreateVector<float>(*scale) : 0,
+    zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0, details_type, details,
+    quantized_dimension);
 }
 
 struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
@@ -2272,20 +2271,20 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   const Int32Vector *array_segments_as_Int32Vector() const
   {
     return array_segments_type() == SparseIndexVector_Int32Vector
-               ? static_cast<const Int32Vector *>(array_segments())
-               : nullptr;
+             ? static_cast<const Int32Vector *>(array_segments())
+             : nullptr;
   }
   const Uint16Vector *array_segments_as_Uint16Vector() const
   {
     return array_segments_type() == SparseIndexVector_Uint16Vector
-               ? static_cast<const Uint16Vector *>(array_segments())
-               : nullptr;
+             ? static_cast<const Uint16Vector *>(array_segments())
+             : nullptr;
   }
   const Uint8Vector *array_segments_as_Uint8Vector() const
   {
     return array_segments_type() == SparseIndexVector_Uint8Vector
-               ? static_cast<const Uint8Vector *>(array_segments())
-               : nullptr;
+             ? static_cast<const Uint8Vector *>(array_segments())
+             : nullptr;
   }
   SparseIndexVector array_indices_type() const
   {
@@ -2296,20 +2295,20 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   const Int32Vector *array_indices_as_Int32Vector() const
   {
     return array_indices_type() == SparseIndexVector_Int32Vector
-               ? static_cast<const Int32Vector *>(array_indices())
-               : nullptr;
+             ? static_cast<const Int32Vector *>(array_indices())
+             : nullptr;
   }
   const Uint16Vector *array_indices_as_Uint16Vector() const
   {
     return array_indices_type() == SparseIndexVector_Uint16Vector
-               ? static_cast<const Uint16Vector *>(array_indices())
-               : nullptr;
+             ? static_cast<const Uint16Vector *>(array_indices())
+             : nullptr;
   }
   const Uint8Vector *array_indices_as_Uint8Vector() const
   {
     return array_indices_type() == SparseIndexVector_Uint8Vector
-               ? static_cast<const Uint8Vector *>(array_indices())
-               : nullptr;
+             ? static_cast<const Uint8Vector *>(array_indices())
+             : nullptr;
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -2435,7 +2434,7 @@ struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata() const
   {
     return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *>(
-        VT_DIM_METADATA);
+      VT_DIM_METADATA);
   }
   bool Verify(flatbuffers::Verifier &verifier) const
   {
@@ -2460,7 +2459,7 @@ struct SparsityParametersBuilder
     fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map);
   }
   void add_dim_metadata(
-      flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata)
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata)
   {
     fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata);
   }
@@ -2478,11 +2477,10 @@ struct SparsityParametersBuilder
 };
 
 inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata =
-        0)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata = 0)
 {
   SparsityParametersBuilder builder_(_fbb);
   builder_.add_dim_metadata(dim_metadata);
@@ -2492,14 +2490,14 @@ inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
 }
 
 inline flatbuffers::Offset<SparsityParameters> CreateSparsityParametersDirect(
-    flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
-    const std::vector<int32_t> *block_map = nullptr,
-    const std::vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata = nullptr)
+  flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
+  const std::vector<int32_t> *block_map = nullptr,
+  const std::vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata = nullptr)
 {
   return onert_tflite::CreateSparsityParameters(
-      _fbb, traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0,
-      block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0,
-      dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<DimensionMetadata>>(*dim_metadata) : 0);
+    _fbb, traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0,
+    block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0,
+    dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<DimensionMetadata>>(*dim_metadata) : 0);
 }
 
 struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
@@ -2619,16 +2617,16 @@ CreateTensor(flatbuffers::FlatBufferBuilder &_fbb,
 }
 
 inline flatbuffers::Offset<Tensor> CreateTensorDirect(
-    flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
-    TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr,
-    flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false,
-    flatbuffers::Offset<SparsityParameters> sparsity = 0,
-    const std::vector<int32_t> *shape_signature = nullptr)
+  flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
+  TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr,
+  flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false,
+  flatbuffers::Offset<SparsityParameters> sparsity = 0,
+  const std::vector<int32_t> *shape_signature = nullptr)
 {
   return onert_tflite::CreateTensor(
-      _fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, buffer,
-      name ? _fbb.CreateString(name) : 0, quantization, is_variable, sparsity,
-      shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0);
+    _fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, buffer,
+    name ? _fbb.CreateString(name) : 0, quantization, is_variable, sparsity,
+    shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0);
 }
 
 struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
@@ -2890,10 +2888,10 @@ struct DepthwiseConv2DOptionsBuilder
 };
 
 inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0,
-    int32_t stride_h = 0, int32_t depth_multiplier = 0,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-    int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+  flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0,
+  int32_t stride_h = 0, int32_t depth_multiplier = 0,
+  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+  int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
 {
   DepthwiseConv2DOptionsBuilder builder_(_fbb);
   builder_.add_dilation_h_factor(dilation_h_factor);
@@ -2942,12 +2940,12 @@ struct ConcatEmbeddingsOptionsBuilder
     fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0);
   }
   void add_num_columns_per_channel(
-      flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel)
   {
     fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel);
   }
   void add_embedding_dim_per_channel(
-      flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel)
   {
     fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL,
                    embedding_dim_per_channel);
@@ -2966,9 +2964,9 @@ struct ConcatEmbeddingsOptionsBuilder
 };
 
 inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
+  flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0)
 {
   ConcatEmbeddingsOptionsBuilder builder_(_fbb);
   builder_.add_embedding_dim_per_channel(embedding_dim_per_channel);
@@ -2983,9 +2981,9 @@ CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_
                                     const std::vector<int32_t> *embedding_dim_per_channel = nullptr)
 {
   return onert_tflite::CreateConcatEmbeddingsOptions(
-      _fbb, num_channels,
-      num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0,
-      embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0);
+    _fbb, num_channels,
+    num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0,
+    embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0);
 }
 
 struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
@@ -3219,9 +3217,9 @@ struct SequenceRNNOptionsBuilder
 };
 
 inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-    bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+  bool asymmetric_quantize_inputs = false)
 {
   SequenceRNNOptionsBuilder builder_(_fbb);
   builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3296,9 +3294,9 @@ struct BidirectionalSequenceRNNOptionsBuilder
 };
 
 inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-    bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+  bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
 {
   BidirectionalSequenceRNNOptionsBuilder builder_(_fbb);
   builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3378,10 +3376,10 @@ struct FullyConnectedOptionsBuilder
 };
 
 inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-    FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT,
-    bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+  FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT,
+  bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
 {
   FullyConnectedOptionsBuilder builder_(_fbb);
   builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3474,8 +3472,8 @@ struct ConcatenationOptionsBuilder
 };
 
 inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
-    flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+  flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
+  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
 {
   ConcatenationOptionsBuilder builder_(_fbb);
   builder_.add_axis(axis);
@@ -3669,7 +3667,7 @@ struct LocalResponseNormalizationOptionsBuilder
     fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f);
   }
   explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-      : fbb_(_fbb)
+    : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
@@ -3845,7 +3843,7 @@ struct UnidirectionalSequenceLSTMOptionsBuilder
                              static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
   }
   explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-      : fbb_(_fbb)
+    : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
@@ -3861,10 +3859,10 @@ struct UnidirectionalSequenceLSTMOptionsBuilder
 
 inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>
 CreateUnidirectionalSequenceLSTMOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-    float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
-    bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+  float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
+  bool asymmetric_quantize_inputs = false)
 {
   UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
   builder_.add_proj_clip(proj_clip);
@@ -3943,7 +3941,7 @@ struct BidirectionalSequenceLSTMOptionsBuilder
                              static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
   }
   explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-      : fbb_(_fbb)
+    : fbb_(_fbb)
   {
     start_ = fbb_.StartTable();
   }
@@ -3958,10 +3956,10 @@ struct BidirectionalSequenceLSTMOptionsBuilder
 };
 
 inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-    float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
-    bool time_major = true, bool asymmetric_quantize_inputs = false)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+  float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
+  bool time_major = true, bool asymmetric_quantize_inputs = false)
 {
   BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
   builder_.add_proj_clip(proj_clip);
@@ -4844,7 +4842,7 @@ CreateSqueezeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
                            const std::vector<int32_t> *squeeze_dims = nullptr)
 {
   return onert_tflite::CreateSqueezeOptions(
-      _fbb, squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0);
+    _fbb, squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0);
 }
 
 struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
@@ -7206,7 +7204,7 @@ CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb,
                          const char *custom_code = nullptr, int32_t version = 1)
 {
   return onert_tflite::CreateOperatorCode(
-      _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0, version);
+    _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0, version);
 }
 
 struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
@@ -7241,611 +7239,611 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   const Conv2DOptions *builtin_options_as_Conv2DOptions() const
   {
     return builtin_options_type() == BuiltinOptions_Conv2DOptions
-               ? static_cast<const Conv2DOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const Conv2DOptions *>(builtin_options())
+             : nullptr;
   }
   const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const
   {
     return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions
-               ? static_cast<const DepthwiseConv2DOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const DepthwiseConv2DOptions *>(builtin_options())
+             : nullptr;
   }
   const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const
   {
     return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions
-               ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options())
+             : nullptr;
   }
   const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const
   {
     return builtin_options_type() == BuiltinOptions_LSHProjectionOptions
-               ? static_cast<const LSHProjectionOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const LSHProjectionOptions *>(builtin_options())
+             : nullptr;
   }
   const Pool2DOptions *builtin_options_as_Pool2DOptions() const
   {
     return builtin_options_type() == BuiltinOptions_Pool2DOptions
-               ? static_cast<const Pool2DOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const Pool2DOptions *>(builtin_options())
+             : nullptr;
   }
   const SVDFOptions *builtin_options_as_SVDFOptions() const
   {
     return builtin_options_type() == BuiltinOptions_SVDFOptions
-               ? static_cast<const SVDFOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const SVDFOptions *>(builtin_options())
+             : nullptr;
   }
   const RNNOptions *builtin_options_as_RNNOptions() const
   {
     return builtin_options_type() == BuiltinOptions_RNNOptions
-               ? static_cast<const RNNOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const RNNOptions *>(builtin_options())
+             : nullptr;
   }
   const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const
   {
     return builtin_options_type() == BuiltinOptions_FullyConnectedOptions
-               ? static_cast<const FullyConnectedOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const FullyConnectedOptions *>(builtin_options())
+             : nullptr;
   }
   const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const
   {
     return builtin_options_type() == BuiltinOptions_SoftmaxOptions
-               ? static_cast<const SoftmaxOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const SoftmaxOptions *>(builtin_options())
+             : nullptr;
   }
   const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const
   {
     return builtin_options_type() == BuiltinOptions_ConcatenationOptions
-               ? static_cast<const ConcatenationOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const ConcatenationOptions *>(builtin_options())
+             : nullptr;
   }
   const AddOptions *builtin_options_as_AddOptions() const
   {
     return builtin_options_type() == BuiltinOptions_AddOptions
-               ? static_cast<const AddOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const AddOptions *>(builtin_options())
+             : nullptr;
   }
   const L2NormOptions *builtin_options_as_L2NormOptions() const
   {
     return builtin_options_type() == BuiltinOptions_L2NormOptions
-               ? static_cast<const L2NormOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const L2NormOptions *>(builtin_options())
+             : nullptr;
   }
   const LocalResponseNormalizationOptions *
   builtin_options_as_LocalResponseNormalizationOptions() const
   {
     return builtin_options_type() == BuiltinOptions_LocalResponseNormalizationOptions
-               ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options())
+             : nullptr;
   }
   const LSTMOptions *builtin_options_as_LSTMOptions() const
   {
     return builtin_options_type() == BuiltinOptions_LSTMOptions
-               ? static_cast<const LSTMOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const LSTMOptions *>(builtin_options())
+             : nullptr;
   }
   const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const
   {
     return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions
-               ? static_cast<const ResizeBilinearOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const ResizeBilinearOptions *>(builtin_options())
+             : nullptr;
   }
   const CallOptions *builtin_options_as_CallOptions() const
   {
     return builtin_options_type() == BuiltinOptions_CallOptions
-               ? static_cast<const CallOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const CallOptions *>(builtin_options())
+             : nullptr;
   }
   const ReshapeOptions *builtin_options_as_ReshapeOptions() const
   {
     return builtin_options_type() == BuiltinOptions_ReshapeOptions
-               ? static_cast<const ReshapeOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const ReshapeOptions *>(builtin_options())
+             : nullptr;
   }
   const SkipGramOptions *builtin_options_as_SkipGramOptions() const
   {
     return builtin_options_type() == BuiltinOptions_SkipGramOptions
-               ? static_cast<const SkipGramOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const SkipGramOptions *>(builtin_options())
+             : nullptr;
   }
   const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const
   {
     return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions
-               ? static_cast<const SpaceToDepthOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const SpaceToDepthOptions *>(builtin_options())
+             : nullptr;
   }
   const EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const
   {
     return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions
-               ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options())
+             : nullptr;
   }
   const MulOptions *builtin_options_as_MulOptions() const
   {
     return builtin_options_type() == BuiltinOptions_MulOptions
-               ? static_cast<const MulOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const MulOptions *>(builtin_options())
+             : nullptr;
   }
   const PadOptions *builtin_options_as_PadOptions() const
   {
     return builtin_options_type() == BuiltinOptions_PadOptions
-               ? static_cast<const PadOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const PadOptions *>(builtin_options())
+             : nullptr;
   }
   const GatherOptions *builtin_options_as_GatherOptions() const
   {
     return builtin_options_type() == BuiltinOptions_GatherOptions
-               ? static_cast<const GatherOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const GatherOptions *>(builtin_options())
+             : nullptr;
   }
   const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const
   {
     return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions
-               ? static_cast<const BatchToSpaceNDOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const BatchToSpaceNDOptions *>(builtin_options())
+             : nullptr;
   }
   const SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const
   {
     return builtin_options_type() == BuiltinOptions_SpaceToBatchNDOptions
-               ? static_cast<const SpaceToBatchNDOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const SpaceToBatchNDOptions *>(builtin_options())
+             : nullptr;
   }
   const TransposeOptions *builtin_options_as_TransposeOptions() const
   {
     return builtin_options_type() == BuiltinOptions_TransposeOptions
-               ? static_cast<const TransposeOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const TransposeOptions *>(builtin_options())
+             : nullptr;
   }
   const ReducerOptions *builtin_options_as_ReducerOptions() const
   {
     return builtin_options_type() == BuiltinOptions_ReducerOptions
-               ? static_cast<const ReducerOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const ReducerOptions *>(builtin_options())
+             : nullptr;
   }
   const SubOptions *builtin_options_as_SubOptions() const
   {
     return builtin_options_type() == BuiltinOptions_SubOptions
-               ? static_cast<const SubOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const SubOptions *>(builtin_options())
+             : nullptr;
   }
   const DivOptions *builtin_options_as_DivOptions() const
   {
     return builtin_options_type() == BuiltinOptions_DivOptions
-               ? static_cast<const DivOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const DivOptions *>(builtin_options())
+             : nullptr;
   }
   const SqueezeOptions *builtin_options_as_SqueezeOptions() const
   {
     return builtin_options_type() == BuiltinOptions_SqueezeOptions
-               ? static_cast<const SqueezeOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const SqueezeOptions *>(builtin_options())
+             : nullptr;
   }
   const SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const
   {
     return builtin_options_type() == BuiltinOptions_SequenceRNNOptions
-               ? static_cast<const SequenceRNNOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const SequenceRNNOptions *>(builtin_options())
+             : nullptr;
   }
   const StridedSliceOptions *builtin_options_as_StridedSliceOptions() const
   {
     return builtin_options_type() == BuiltinOptions_StridedSliceOptions
-               ? static_cast<const StridedSliceOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const StridedSliceOptions *>(builtin_options())
+             : nullptr;
   }
   const ExpOptions *builtin_options_as_ExpOptions() const
   {
     return builtin_options_type() == BuiltinOptions_ExpOptions
-               ? static_cast<const ExpOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const ExpOptions *>(builtin_options())
+             : nullptr;
   }
   const TopKV2Options *builtin_options_as_TopKV2Options() const
   {
     return builtin_options_type() == BuiltinOptions_TopKV2Options
-               ? static_cast<const TopKV2Options *>(builtin_options())
-               : nullptr;
+             ? static_cast<const TopKV2Options *>(builtin_options())
+             : nullptr;
   }
   const SplitOptions *builtin_options_as_SplitOptions() const
   {
     return builtin_options_type() == BuiltinOptions_SplitOptions
-               ? static_cast<const SplitOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const SplitOptions *>(builtin_options())
+             : nullptr;
   }
   const LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const
   {
     return builtin_options_type() == BuiltinOptions_LogSoftmaxOptions
-               ? static_cast<const LogSoftmaxOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const LogSoftmaxOptions *>(builtin_options())
+             : nullptr;
   }
   const CastOptions *builtin_options_as_CastOptions() const
   {
     return builtin_options_type() == BuiltinOptions_CastOptions
-               ? static_cast<const CastOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const CastOptions *>(builtin_options())
+             : nullptr;
   }
   const DequantizeOptions *builtin_options_as_DequantizeOptions() const
   {
     return builtin_options_type() == BuiltinOptions_DequantizeOptions
-               ? static_cast<const DequantizeOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const DequantizeOptions *>(builtin_options())
+             : nullptr;
   }
   const MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const
   {
     return builtin_options_type() == BuiltinOptions_MaximumMinimumOptions
-               ? static_cast<const MaximumMinimumOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const MaximumMinimumOptions *>(builtin_options())
+             : nullptr;
   }
   const ArgMaxOptions *builtin_options_as_ArgMaxOptions() const
   {
     return builtin_options_type() == BuiltinOptions_ArgMaxOptions
-               ? static_cast<const ArgMaxOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const ArgMaxOptions *>(builtin_options())
+             : nullptr;
   }
   const LessOptions *builtin_options_as_LessOptions() const
   {
     return builtin_options_type() == BuiltinOptions_LessOptions
-               ? static_cast<const LessOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const LessOptions *>(builtin_options())
+             : nullptr;
   }
   const NegOptions *builtin_options_as_NegOptions() const
   {
     return builtin_options_type() == BuiltinOptions_NegOptions
-               ? static_cast<const NegOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const NegOptions *>(builtin_options())
+             : nullptr;
   }
   const PadV2Options *builtin_options_as_PadV2Options() const
   {
     return builtin_options_type() == BuiltinOptions_PadV2Options
-               ? static_cast<const PadV2Options *>(builtin_options())
-               : nullptr;
+             ? static_cast<const PadV2Options *>(builtin_options())
+             : nullptr;
   }
   const GreaterOptions *builtin_options_as_GreaterOptions() const
   {
     return builtin_options_type() == BuiltinOptions_GreaterOptions
-               ? static_cast<const GreaterOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const GreaterOptions *>(builtin_options())
+             : nullptr;
   }
   const GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const
   {
     return builtin_options_type() == BuiltinOptions_GreaterEqualOptions
-               ? static_cast<const GreaterEqualOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const GreaterEqualOptions *>(builtin_options())
+             : nullptr;
   }
   const LessEqualOptions *builtin_options_as_LessEqualOptions() const
   {
     return builtin_options_type() == BuiltinOptions_LessEqualOptions
-               ? static_cast<const LessEqualOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const LessEqualOptions *>(builtin_options())
+             : nullptr;
   }
   const SelectOptions *builtin_options_as_SelectOptions() const
   {
     return builtin_options_type() == BuiltinOptions_SelectOptions
-               ? static_cast<const SelectOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const SelectOptions *>(builtin_options())
+             : nullptr;
   }
   const SliceOptions *builtin_options_as_SliceOptions() const
   {
     return builtin_options_type() == BuiltinOptions_SliceOptions
-               ? static_cast<const SliceOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const SliceOptions *>(builtin_options())
+             : nullptr;
   }
   const TransposeConvOptions *builtin_options_as_TransposeConvOptions() const
   {
     return builtin_options_type() == BuiltinOptions_TransposeConvOptions
-               ? static_cast<const TransposeConvOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const TransposeConvOptions *>(builtin_options())
+             : nullptr;
   }
   const SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const
   {
     return builtin_options_type() == BuiltinOptions_SparseToDenseOptions
-               ? static_cast<const SparseToDenseOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const SparseToDenseOptions *>(builtin_options())
+             : nullptr;
   }
   const TileOptions *builtin_options_as_TileOptions() const
   {
     return builtin_options_type() == BuiltinOptions_TileOptions
-               ? static_cast<const TileOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const TileOptions *>(builtin_options())
+             : nullptr;
   }
   const ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const
   {
     return builtin_options_type() == BuiltinOptions_ExpandDimsOptions
-               ? static_cast<const ExpandDimsOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const ExpandDimsOptions *>(builtin_options())
+             : nullptr;
   }
   const EqualOptions *builtin_options_as_EqualOptions() const
   {
     return builtin_options_type() == BuiltinOptions_EqualOptions
-               ? static_cast<const EqualOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const EqualOptions *>(builtin_options())
+             : nullptr;
   }
   const NotEqualOptions *builtin_options_as_NotEqualOptions() const
   {
     return builtin_options_type() == BuiltinOptions_NotEqualOptions
-               ? static_cast<const NotEqualOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const NotEqualOptions *>(builtin_options())
+             : nullptr;
   }
   const ShapeOptions *builtin_options_as_ShapeOptions() const
   {
     return builtin_options_type() == BuiltinOptions_ShapeOptions
-               ? static_cast<const ShapeOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const ShapeOptions *>(builtin_options())
+             : nullptr;
   }
   const PowOptions *builtin_options_as_PowOptions() const
   {
     return builtin_options_type() == BuiltinOptions_PowOptions
-               ? static_cast<const PowOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const PowOptions *>(builtin_options())
+             : nullptr;
   }
   const ArgMinOptions *builtin_options_as_ArgMinOptions() const
   {
     return builtin_options_type() == BuiltinOptions_ArgMinOptions
-               ? static_cast<const ArgMinOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const ArgMinOptions *>(builtin_options())
+             : nullptr;
   }
   const FakeQuantOptions *builtin_options_as_FakeQuantOptions() const
   {
     return builtin_options_type() == BuiltinOptions_FakeQuantOptions
-               ? static_cast<const FakeQuantOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const FakeQuantOptions *>(builtin_options())
+             : nullptr;
   }
   const PackOptions *builtin_options_as_PackOptions() const
   {
     return builtin_options_type() == BuiltinOptions_PackOptions
-               ? static_cast<const PackOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const PackOptions *>(builtin_options())
+             : nullptr;
   }
   const LogicalOrOptions *builtin_options_as_LogicalOrOptions() const
   {
     return builtin_options_type() == BuiltinOptions_LogicalOrOptions
-               ? static_cast<const LogicalOrOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const LogicalOrOptions *>(builtin_options())
+             : nullptr;
   }
   const OneHotOptions *builtin_options_as_OneHotOptions() const
   {
     return builtin_options_type() == BuiltinOptions_OneHotOptions
-               ? static_cast<const OneHotOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const OneHotOptions *>(builtin_options())
+             : nullptr;
   }
   const LogicalAndOptions *builtin_options_as_LogicalAndOptions() const
   {
     return builtin_options_type() == BuiltinOptions_LogicalAndOptions
-               ? static_cast<const LogicalAndOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const LogicalAndOptions *>(builtin_options())
+             : nullptr;
   }
   const LogicalNotOptions *builtin_options_as_LogicalNotOptions() const
   {
     return builtin_options_type() == BuiltinOptions_LogicalNotOptions
-               ? static_cast<const LogicalNotOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const LogicalNotOptions *>(builtin_options())
+             : nullptr;
   }
   const UnpackOptions *builtin_options_as_UnpackOptions() const
   {
     return builtin_options_type() == BuiltinOptions_UnpackOptions
-               ? static_cast<const UnpackOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const UnpackOptions *>(builtin_options())
+             : nullptr;
   }
   const FloorDivOptions *builtin_options_as_FloorDivOptions() const
   {
     return builtin_options_type() == BuiltinOptions_FloorDivOptions
-               ? static_cast<const FloorDivOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const FloorDivOptions *>(builtin_options())
+             : nullptr;
   }
   const SquareOptions *builtin_options_as_SquareOptions() const
   {
     return builtin_options_type() == BuiltinOptions_SquareOptions
-               ? static_cast<const SquareOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const SquareOptions *>(builtin_options())
+             : nullptr;
   }
   const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const
   {
     return builtin_options_type() == BuiltinOptions_ZerosLikeOptions
-               ? static_cast<const ZerosLikeOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const ZerosLikeOptions *>(builtin_options())
+             : nullptr;
   }
   const FillOptions *builtin_options_as_FillOptions() const
   {
     return builtin_options_type() == BuiltinOptions_FillOptions
-               ? static_cast<const FillOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const FillOptions *>(builtin_options())
+             : nullptr;
   }
   const BidirectionalSequenceLSTMOptions *
   builtin_options_as_BidirectionalSequenceLSTMOptions() const
   {
     return builtin_options_type() == BuiltinOptions_BidirectionalSequenceLSTMOptions
-               ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options())
+             : nullptr;
   }
   const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const
   {
     return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions
-               ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options())
+             : nullptr;
   }
   const UnidirectionalSequenceLSTMOptions *
   builtin_options_as_UnidirectionalSequenceLSTMOptions() const
   {
     return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions
-               ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options())
+             : nullptr;
   }
   const FloorModOptions *builtin_options_as_FloorModOptions() const
   {
     return builtin_options_type() == BuiltinOptions_FloorModOptions
-               ? static_cast<const FloorModOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const FloorModOptions *>(builtin_options())
+             : nullptr;
   }
   const RangeOptions *builtin_options_as_RangeOptions() const
   {
     return builtin_options_type() == BuiltinOptions_RangeOptions
-               ? static_cast<const RangeOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const RangeOptions *>(builtin_options())
+             : nullptr;
   }
   const ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const
   {
     return builtin_options_type() == BuiltinOptions_ResizeNearestNeighborOptions
-               ? static_cast<const ResizeNearestNeighborOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const ResizeNearestNeighborOptions *>(builtin_options())
+             : nullptr;
   }
   const LeakyReluOptions *builtin_options_as_LeakyReluOptions() const
   {
     return builtin_options_type() == BuiltinOptions_LeakyReluOptions
-               ? static_cast<const LeakyReluOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const LeakyReluOptions *>(builtin_options())
+             : nullptr;
   }
   const SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const
   {
     return builtin_options_type() == BuiltinOptions_SquaredDifferenceOptions
-               ? static_cast<const SquaredDifferenceOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const SquaredDifferenceOptions *>(builtin_options())
+             : nullptr;
   }
   const MirrorPadOptions *builtin_options_as_MirrorPadOptions() const
   {
     return builtin_options_type() == BuiltinOptions_MirrorPadOptions
-               ? static_cast<const MirrorPadOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const MirrorPadOptions *>(builtin_options())
+             : nullptr;
   }
   const AbsOptions *builtin_options_as_AbsOptions() const
   {
     return builtin_options_type() == BuiltinOptions_AbsOptions
-               ? static_cast<const AbsOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const AbsOptions *>(builtin_options())
+             : nullptr;
   }
   const SplitVOptions *builtin_options_as_SplitVOptions() const
   {
     return builtin_options_type() == BuiltinOptions_SplitVOptions
-               ? static_cast<const SplitVOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const SplitVOptions *>(builtin_options())
+             : nullptr;
   }
   const UniqueOptions *builtin_options_as_UniqueOptions() const
   {
     return builtin_options_type() == BuiltinOptions_UniqueOptions
-               ? static_cast<const UniqueOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const UniqueOptions *>(builtin_options())
+             : nullptr;
   }
   const ReverseV2Options *builtin_options_as_ReverseV2Options() const
   {
     return builtin_options_type() == BuiltinOptions_ReverseV2Options
-               ? static_cast<const ReverseV2Options *>(builtin_options())
-               : nullptr;
+             ? static_cast<const ReverseV2Options *>(builtin_options())
+             : nullptr;
   }
   const AddNOptions *builtin_options_as_AddNOptions() const
   {
     return builtin_options_type() == BuiltinOptions_AddNOptions
-               ? static_cast<const AddNOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const AddNOptions *>(builtin_options())
+             : nullptr;
   }
   const GatherNdOptions *builtin_options_as_GatherNdOptions() const
   {
     return builtin_options_type() == BuiltinOptions_GatherNdOptions
-               ? static_cast<const GatherNdOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const GatherNdOptions *>(builtin_options())
+             : nullptr;
   }
   const CosOptions *builtin_options_as_CosOptions() const
   {
     return builtin_options_type() == BuiltinOptions_CosOptions
-               ? static_cast<const CosOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const CosOptions *>(builtin_options())
+             : nullptr;
   }
   const WhereOptions *builtin_options_as_WhereOptions() const
   {
     return builtin_options_type() == BuiltinOptions_WhereOptions
-               ? static_cast<const WhereOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const WhereOptions *>(builtin_options())
+             : nullptr;
   }
   const RankOptions *builtin_options_as_RankOptions() const
   {
     return builtin_options_type() == BuiltinOptions_RankOptions
-               ? static_cast<const RankOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const RankOptions *>(builtin_options())
+             : nullptr;
   }
   const ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const
   {
     return builtin_options_type() == BuiltinOptions_ReverseSequenceOptions
-               ? static_cast<const ReverseSequenceOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const ReverseSequenceOptions *>(builtin_options())
+             : nullptr;
   }
   const MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const
   {
     return builtin_options_type() == BuiltinOptions_MatrixDiagOptions
-               ? static_cast<const MatrixDiagOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const MatrixDiagOptions *>(builtin_options())
+             : nullptr;
   }
   const QuantizeOptions *builtin_options_as_QuantizeOptions() const
   {
     return builtin_options_type() == BuiltinOptions_QuantizeOptions
-               ? static_cast<const QuantizeOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const QuantizeOptions *>(builtin_options())
+             : nullptr;
   }
   const MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const
   {
     return builtin_options_type() == BuiltinOptions_MatrixSetDiagOptions
-               ? static_cast<const MatrixSetDiagOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const MatrixSetDiagOptions *>(builtin_options())
+             : nullptr;
   }
   const HardSwishOptions *builtin_options_as_HardSwishOptions() const
   {
     return builtin_options_type() == BuiltinOptions_HardSwishOptions
-               ? static_cast<const HardSwishOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const HardSwishOptions *>(builtin_options())
+             : nullptr;
   }
   const IfOptions *builtin_options_as_IfOptions() const
   {
     return builtin_options_type() == BuiltinOptions_IfOptions
-               ? static_cast<const IfOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const IfOptions *>(builtin_options())
+             : nullptr;
   }
   const WhileOptions *builtin_options_as_WhileOptions() const
   {
     return builtin_options_type() == BuiltinOptions_WhileOptions
-               ? static_cast<const WhileOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const WhileOptions *>(builtin_options())
+             : nullptr;
   }
   const DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const
   {
     return builtin_options_type() == BuiltinOptions_DepthToSpaceOptions
-               ? static_cast<const DepthToSpaceOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const DepthToSpaceOptions *>(builtin_options())
+             : nullptr;
   }
   const NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const
   {
     return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV4Options
-               ? static_cast<const NonMaxSuppressionV4Options *>(builtin_options())
-               : nullptr;
+             ? static_cast<const NonMaxSuppressionV4Options *>(builtin_options())
+             : nullptr;
   }
   const NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const
   {
     return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV5Options
-               ? static_cast<const NonMaxSuppressionV5Options *>(builtin_options())
-               : nullptr;
+             ? static_cast<const NonMaxSuppressionV5Options *>(builtin_options())
+             : nullptr;
   }
   const ScatterNdOptions *builtin_options_as_ScatterNdOptions() const
   {
     return builtin_options_type() == BuiltinOptions_ScatterNdOptions
-               ? static_cast<const ScatterNdOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const ScatterNdOptions *>(builtin_options())
+             : nullptr;
   }
   const SelectV2Options *builtin_options_as_SelectV2Options() const
   {
     return builtin_options_type() == BuiltinOptions_SelectV2Options
-               ? static_cast<const SelectV2Options *>(builtin_options())
-               : nullptr;
+             ? static_cast<const SelectV2Options *>(builtin_options())
+             : nullptr;
   }
   const DensifyOptions *builtin_options_as_DensifyOptions() const
   {
     return builtin_options_type() == BuiltinOptions_DensifyOptions
-               ? static_cast<const DensifyOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const DensifyOptions *>(builtin_options())
+             : nullptr;
   }
   const SegmentSumOptions *builtin_options_as_SegmentSumOptions() const
   {
     return builtin_options_type() == BuiltinOptions_SegmentSumOptions
-               ? static_cast<const SegmentSumOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const SegmentSumOptions *>(builtin_options())
+             : nullptr;
   }
   const BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const
   {
     return builtin_options_type() == BuiltinOptions_BatchMatMulOptions
-               ? static_cast<const BatchMatMulOptions *>(builtin_options())
-               : nullptr;
+             ? static_cast<const BatchMatMulOptions *>(builtin_options())
+             : nullptr;
   }
   const flatbuffers::Vector<uint8_t> *custom_options() const
   {
@@ -8457,7 +8455,7 @@ struct OperatorBuilder
                             static_cast<int8_t>(custom_options_format), 0);
   }
   void add_mutating_variable_inputs(
-      flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
+    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs)
   {
     fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs);
   }
@@ -8514,11 +8512,11 @@ CreateOperatorDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index
                      const std::vector<int32_t> *intermediates = nullptr)
 {
   return onert_tflite::CreateOperator(
-      _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
-      outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type, builtin_options,
-      custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0, custom_options_format,
-      mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0,
-      intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0);
+    _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
+    outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type, builtin_options,
+    custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0, custom_options_format,
+    mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0,
+    intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0);
 }
 
 struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
@@ -8602,12 +8600,12 @@ struct SubGraphBuilder
 };
 
 inline flatbuffers::Offset<SubGraph> CreateSubGraph(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0,
-    flatbuffers::Offset<flatbuffers::String> name = 0)
+  flatbuffers::FlatBufferBuilder &_fbb,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0,
+  flatbuffers::Offset<flatbuffers::String> name = 0)
 {
   SubGraphBuilder builder_(_fbb);
   builder_.add_name(name);
@@ -8618,20 +8616,18 @@ inline flatbuffers::Offset<SubGraph> CreateSubGraph(
   return builder_.Finish();
 }
 
-inline flatbuffers::Offset<SubGraph>
-CreateSubGraphDirect(flatbuffers::FlatBufferBuilder &_fbb,
-                     const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr,
-                     const std::vector<int32_t> *inputs = nullptr,
-                     const std::vector<int32_t> *outputs = nullptr,
-                     const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr,
-                     const char *name = nullptr)
+inline flatbuffers::Offset<SubGraph> CreateSubGraphDirect(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr,
+  const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
+  const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr, const char *name = nullptr)
 {
   return onert_tflite::CreateSubGraph(
-      _fbb, tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0,
-      inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
-      outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0,
-      operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0,
-      name ? _fbb.CreateString(name) : 0);
+    _fbb, tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0,
+    inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
+    outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0,
+    operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0,
+    name ? _fbb.CreateString(name) : 0);
 }
 
 struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
@@ -8762,7 +8758,7 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
   const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *operator_codes() const
   {
     return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *>(
-        VT_OPERATOR_CODES);
+      VT_OPERATOR_CODES);
   }
   const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *subgraphs() const
   {
@@ -8805,7 +8801,7 @@ struct ModelBuilder
   flatbuffers::uoffset_t start_;
   void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); }
   void add_operator_codes(
-      flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes)
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes)
   {
     fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes);
   }
@@ -8845,13 +8841,13 @@ struct ModelBuilder
 };
 
 inline flatbuffers::Offset<Model> CreateModel(
-    flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0,
-    flatbuffers::Offset<flatbuffers::String> description = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata = 0)
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0,
+  flatbuffers::Offset<flatbuffers::String> description = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata = 0)
 {
   ModelBuilder builder_(_fbb);
   builder_.add_metadata(metadata);
@@ -8874,13 +8870,13 @@ CreateModelDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
                   const std::vector<flatbuffers::Offset<Metadata>> *metadata = nullptr)
 {
   return onert_tflite::CreateModel(
-      _fbb, version,
-      operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0,
-      subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0,
-      description ? _fbb.CreateString(description) : 0,
-      buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0,
-      metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0,
-      metadata ? _fbb.CreateVector<flatbuffers::Offset<Metadata>>(*metadata) : 0);
+    _fbb, version,
+    operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0,
+    subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0,
+    description ? _fbb.CreateString(description) : 0,
+    buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0,
+    metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0,
+    metadata ? _fbb.CreateVector<flatbuffers::Offset<Metadata>>(*metadata) : 0);
 }
 
 inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj,
diff --git a/runtime/onert/sample/.clang-format b/runtime/onert/sample/.clang-format
new file mode 120000
index 000000000..83185fee3
--- /dev/null
+++ b/runtime/onert/sample/.clang-format
@@ -0,0 +1 @@
+../../../.clang-format.8
+\ No newline at end of file
diff --git a/runtime/onert/test/.clang-format b/runtime/onert/test/.clang-format
new file mode 120000
index 000000000..83185fee3
--- /dev/null
+++ b/runtime/onert/test/.clang-format
@@ -0,0 +1 @@
+../../../.clang-format.8
+\ No newline at end of file
diff --git a/runtime/onert/test/core/compiler/Scheduler.cc b/runtime/onert/test/core/compiler/HEScheduler.cc
index 50f3964db..c77ebb895 100644
--- a/runtime/onert/test/core/compiler/Scheduler.cc
+++ b/runtime/onert/test/core/compiler/HEScheduler.cc
@@ -55,8 +55,7 @@ struct MockBackendCPU : public Backend
   std::unique_ptr<BackendContext>
   newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
   {
-    return std::unique_ptr<BackendContext>(
-        new BackendContext{this, nullptr, nullptr, nullptr, nullptr});
+    return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr});
   }
 };
 
@@ -79,8 +78,7 @@ struct MockBackendGPU : public Backend
   std::unique_ptr<BackendContext>
   newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
   {
-    return std::unique_ptr<BackendContext>(
-        new BackendContext{this, nullptr, nullptr, nullptr, nullptr});
+    return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr});
   }
 };
 
@@ -103,8 +101,7 @@ struct MockBackendNPU : public Backend
   std::unique_ptr<BackendContext>
   newContext(const Graph &, const std::shared_ptr<custom::IKernelBuilder> &, bool) const override
   {
-    return std::unique_ptr<BackendContext>(
-        new BackendContext{this, nullptr, nullptr, nullptr, nullptr});
+    return std::unique_ptr<BackendContext>(new BackendContext{this, nullptr});
   }
 };
 
@@ -165,7 +162,7 @@ void setOperationsExecutionTime(const std::vector<const Backend *> &backends,
     for (auto &backend : backends)
       setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
   }
-  et.uploadOperationsExecTime();
+  et.storeOperationsExecTime();
 }
 
 // Set permute time from one backend to another. This method is needed since ExecutionTime has only
@@ -195,7 +192,7 @@ void setPermutationsExecutionTime(const std::vector<const Backend *> &backends,
       setPermutationTime(et, backend, other_backend, false, operand_size, exec_time);
     }
   }
-  et.uploadOperationsExecTime();
+  et.storeOperationsExecTime();
 }
 
 //
@@ -304,7 +301,7 @@ std::shared_ptr<Graph> createBranchedGraph()
 //
 
 // SetUp/TearDown methods runs before/after each test and performs actions common for each test
-class SchedulerTest : public ::testing::Test
+class HESchedulerTest : public ::testing::Test
 {
 protected:
   void SetUp() override
@@ -359,8 +356,8 @@ protected:
   std::string _original_profiling_mode;
 };
 
-class SchedulerTestWithExecutorParam : public SchedulerTest,
-                                       public testing::WithParamInterface<std::string>
+class HESchedulerTestWithExecutorParam : public HESchedulerTest,
+                                         public testing::WithParamInterface<std::string>
 {
 };
 
@@ -369,7 +366,7 @@ class SchedulerTestWithExecutorParam : public SchedulerTest,
 //
 
 // Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
-TEST_P(SchedulerTestWithExecutorParam, straight_graph_known_exec_time)
+TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
 {
   setExecutor(GetParam());
 
@@ -392,7 +389,7 @@ TEST_P(SchedulerTestWithExecutorParam, straight_graph_known_exec_time)
     setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1);
     setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1);
     setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1);
-    et.uploadOperationsExecTime();
+    et.storeOperationsExecTime();
 
     // Test scheduler
     auto backend_contexts = buildBackendContexts(*graph);
@@ -422,7 +419,7 @@ TEST_P(SchedulerTestWithExecutorParam, straight_graph_known_exec_time)
 }
 
 // Test scheduler behavior for branched graph with known execution time of all nodes and permutes
-TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time)
+TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time)
 {
   const int64_t NPU_ET = 5000;
   setExecutor(GetParam());
@@ -432,7 +429,7 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time)
   auto graph(createBranchedGraph());
   subgs.push(ir::SubgraphIndex{0}, graph);
   OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
-      sub_op_idx(5);
+    sub_op_idx(5);
 
   // Set default execution and transfer time
   setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000);
@@ -451,7 +448,7 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time)
     setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET);
     setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000);
     setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000);
-    et.uploadOperationsExecTime();
+    et.storeOperationsExecTime();
 
     // Test scheduler
     auto backend_contexts = buildBackendContexts(*graph);
@@ -463,7 +460,7 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time)
     if (GetParam() == PARALLEL)
     {
       branch1_expected_backend =
-          br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
+        br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
       branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu";
     }
 
@@ -486,7 +483,7 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time)
      * branching or scheduler assigns another backend to a node*/
     setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1);
     setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1);
-    et.uploadOperationsExecTime();
+    et.storeOperationsExecTime();
 
     // Test scheduler
     auto backend_contexts = buildBackendContexts(*graph);
@@ -504,11 +501,11 @@ TEST_P(SchedulerTestWithExecutorParam, branched_graph_known_exec_time)
 
 // SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
 // one time for each executor
-INSTANTIATE_TEST_CASE_P(AllExecutors, SchedulerTestWithExecutorParam,
+INSTANTIATE_TEST_CASE_P(AllExecutors, HESchedulerTestWithExecutorParam,
                         testing::Values(LINEAR, DATAFLOW, PARALLEL));
 
 // Test scheduler behavior for branched graph and enabled profiling mode
-TEST_F(SchedulerTest, branched_graph_profiling_mode)
+TEST_F(HESchedulerTest, branched_graph_profiling_mode)
 {
   const int ET = 1e5;
 
@@ -521,7 +518,7 @@ TEST_F(SchedulerTest, branched_graph_profiling_mode)
   auto graph(createBranchedGraph());
   subgs.push(ir::SubgraphIndex{0}, graph);
   OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
-      sub_op_idx(5);
+    sub_op_idx(5);
 
   // Test 1
   // Expected behaviour: scheduler assigns backends to nodes with unknown execution time
@@ -537,7 +534,7 @@ TEST_F(SchedulerTest, branched_graph_profiling_mode)
     setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET);
     setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
     setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET);
-    et.uploadOperationsExecTime();
+    et.storeOperationsExecTime();
 
     // Test scheduler
     auto backend_contexts = buildBackendContexts(*graph);
@@ -560,7 +557,7 @@ TEST_F(SchedulerTest, branched_graph_profiling_mode)
     setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET);
     setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
     setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
-    et.uploadOperationsExecTime();
+    et.storeOperationsExecTime();
 
     // Test scheduler
     auto backend_contexts = buildBackendContexts(*graph);
diff --git a/runtime/onert/test/core/exec/ExecInstance.cc b/runtime/onert/test/core/exec/ExecInstance.cc
index 806b47ecc..0e742e1e4 100644
--- a/runtime/onert/test/core/exec/ExecInstance.cc
+++ b/runtime/onert/test/core/exec/ExecInstance.cc
@@ -21,6 +21,7 @@
 #include "compiler/Compiler.h"
 #include "exec/Execution.h"
 #include "ir/operation/BinaryArithmetic.h"
+#include "util/TracingCtx.h"
 
 namespace
 {
@@ -51,8 +52,8 @@ public:
     auto operand_rhs2 = graph->addOperand(shape, type);
     auto operand_result2 = graph->addOperand(shape, type);
     graph->operands()
-        .at(operand_rhs2)
-        .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
+      .at(operand_rhs2)
+      .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
     // 2nd add operations (result2 <= result1 + rhs2)
     operation::BinaryArithmetic::Param param1;
     param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
@@ -60,14 +61,14 @@ public:
     auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
     auto output_set1 = OperandIndexSequence{operand_result1};
     graph->addOperation(
-        std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+      std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
     operation::BinaryArithmetic::Param param2;
     param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
     param2.activation = Activation::NONE;
     auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
     auto output_set2 = OperandIndexSequence{operand_result2};
     graph->addOperation(
-        std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
+      std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
     // Identify model inputs and outputs
     graph->addInput(operand_lhs);
     graph->addInput(operand_rhs1);
@@ -77,13 +78,15 @@ public:
     // Compile
     auto subgs = std::make_shared<onert::ir::Subgraphs>();
     subgs->push(onert::ir::SubgraphIndex{0}, graph);
-    onert::compiler::Compiler compiler{subgs};
+    tracing_ctx = std::make_unique<onert::util::TracingCtx>(subgs.get());
+    onert::compiler::Compiler compiler{subgs, tracing_ctx.get()};
     executors = compiler.compile();
   }
 
 public:
   std::shared_ptr<Graph> graph;
   std::shared_ptr<onert::exec::ExecutorMap> executors;
+  std::unique_ptr<onert::util::TracingCtx> tracing_ctx;
 };
 
 TEST(ExecInstance, simple)
@@ -137,7 +140,8 @@ TEST(ExecInstance, twoCompile)
   // Make new executor: compile again
   auto subgs = std::make_shared<onert::ir::Subgraphs>();
   subgs->push(onert::ir::SubgraphIndex{0}, graph);
-  onert::compiler::Compiler compiler{subgs};
+  auto tracing_ctx = std::make_unique<onert::util::TracingCtx>(subgs.get());
+  onert::compiler::Compiler compiler{subgs, tracing_ctx.get()};
   std::shared_ptr<onert::exec::ExecutorMap> executors2 = compiler.compile();
   onert::exec::Execution execution2{executors2};
 
@@ -205,7 +209,7 @@ class Inference
 public:
   Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4],
             std::shared_ptr<onert::exec::ExecutorMap> &executors)
-      : _input1{input1}, _input2{input2}, _output{output}, _executors{executors}
+    : _input1{input1}, _input2{input2}, _output{output}, _executors{executors}
   {
     // DO NOTHING
   }
diff --git a/runtime/onert/test/core/exec/ExecTime.test.cc b/runtime/onert/test/core/exec/ExecTime.test.cc
index 8c2e34df8..6b0c35a79 100644
--- a/runtime/onert/test/core/exec/ExecTime.test.cc
+++ b/runtime/onert/test/core/exec/ExecTime.test.cc
@@ -62,7 +62,7 @@ TEST(ExecTime, roundtrip_ok)
     et.updateOperationExecTime(b, "op1", true, 100, 100);
     et.updateOperationExecTime(b, "op1", true, 200, 200);
     et.updateOperationExecTime(b, "op1", false, 100, 888);
-    et.uploadOperationsExecTime();
+    et.storeOperationsExecTime();
   }
   {
     ExecTime et(bs);
@@ -73,7 +73,7 @@ TEST(ExecTime, roundtrip_ok)
     ASSERT_EQ(time, 150);
     time = et.getOperationExecTime(b, "op1", false, 100);
     ASSERT_EQ(time, 888);
-    et.uploadOperationsExecTime();
+    et.storeOperationsExecTime();
   }
   // clean up
   EXPECT_EQ(remove("exec_time.json"), 0);
@@ -88,7 +88,7 @@ TEST(ExecTime, structure)
     ExecTime et(bs);
     et.updateOperationExecTime(b, "op1", true, 100, 100);
     et.updateOperationExecTime(b, "op1", true, 200, 200);
-    et.uploadOperationsExecTime();
+    et.storeOperationsExecTime();
   }
   {
     ExecTime et(bs);
@@ -97,7 +97,7 @@ TEST(ExecTime, structure)
     // Check interpolation
     time = et.getOperationExecTime(b, "op1", true, 200);
     ASSERT_EQ(time, 200);
-    et.uploadOperationsExecTime();
+    et.storeOperationsExecTime();
   }
   // clean up
   EXPECT_EQ(remove("exec_time.json"), 0);
diff --git a/runtime/onert/test/core/interp/ExecManager.cc b/runtime/onert/test/core/interp/ExecManager.cc
index 0c7b1b762..327c38f79 100644
--- a/runtime/onert/test/core/interp/ExecManager.cc
+++ b/runtime/onert/test/core/interp/ExecManager.cc
@@ -63,7 +63,7 @@ protected:
     auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
     auto output_set = OperandIndexSequence{operand_result};
     _graph->addOperation(
-        std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
+      std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
 
     // Identify model inputs and outputs
 
@@ -79,7 +79,7 @@ protected:
 
     _executors = std::make_shared<ExecutorMap>();
     _executors->insert(
-        std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
+      std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
   }
 
   void CreateTwoStepModel()
@@ -109,8 +109,8 @@ protected:
     auto operand_rhs2 = _graph->addOperand(shape, type);
     auto operand_result2 = _graph->addOperand(shape, type);
     _graph->operands()
-        .at(operand_rhs2)
-        .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
+      .at(operand_rhs2)
+      .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
 
     // 2nd add operations (result2 <= result1 + rhs2)
 
@@ -120,7 +120,7 @@ protected:
     auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
     auto output_set1 = OperandIndexSequence{operand_result1};
     _graph->addOperation(
-        std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+      std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
 
     operation::BinaryArithmetic::Param param2;
     param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
@@ -128,7 +128,7 @@ protected:
     auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
     auto output_set2 = OperandIndexSequence{operand_result2};
     _graph->addOperation(
-        std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
+      std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
 
     // Identify model inputs and outputs
 
@@ -144,7 +144,7 @@ protected:
 
     _executors = std::make_shared<ExecutorMap>();
     _executors->insert(
-        std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
+      std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
   }
 
   void CreateUnspecifiedDimensionsModel()
@@ -168,9 +168,8 @@ protected:
 
     auto operand_activation = _graph->addOperand(shape_scalar, type_scalar);
     _graph->operands()
-        .at(operand_activation)
-        .data(
-            std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&_activation_value), 4));
+      .at(operand_activation)
+      .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&_activation_value), 4));
 
     auto operand_result = _graph->addOperand(shape, type);
 
@@ -182,7 +181,7 @@ protected:
     auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
     auto output_set = OperandIndexSequence{operand_result};
     _graph->addOperation(
-        std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
+      std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
 
     // Identify model inputs and outputs
 
@@ -198,7 +197,7 @@ protected:
 
     _executors = std::make_shared<ExecutorMap>();
     _executors->insert(
-        std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
+      std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
   }
 
   void createExecution() { _execution = std::make_unique<Execution>(_executors); }
diff --git a/runtime/onert/test/graph/MockNode.h b/runtime/onert/test/graph/MockNode.h
index 60b4719ed..0e7ed977b 100644
--- a/runtime/onert/test/graph/MockNode.h
+++ b/runtime/onert/test/graph/MockNode.h
@@ -30,7 +30,7 @@ class SimpleMock : public onert::ir::Operation
 public:
   SimpleMock(const onert::ir::OperandIndexSequence &inputs,
              const onert::ir::OperandIndexSequence &outputs)
-      : Operation{onert::ir::OperandConstraint::createAny()}
+    : Operation{onert::ir::OperandConstraint::createAny()}
   {
     setInputs(inputs);
     setOutputs(outputs);
diff --git a/runtime/onert/test/graph/operand/UseDef.cc b/runtime/onert/test/graph/operand/UseDef.cc
index 206e402ed..5ef10027e 100644
--- a/runtime/onert/test/graph/operand/UseDef.cc
+++ b/runtime/onert/test/graph/operand/UseDef.cc
@@ -49,16 +49,16 @@ TEST(ir_Operand, neg_usedef)
   // MockNode1
   auto operand_index1 = graph.addOperand(shape, type);
   auto mocknode_index1 =
-      graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1}));
+    graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1}));
 
   // MockNode2
   auto operand_index2 = graph.addOperand(shape, type);
   auto mocknode_index2 =
-      graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2}));
+    graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2}));
 
   // MockNode3(two input)
   auto multiinput_index = graph.addOperation(
-      std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand}));
+    std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand}));
 
   graph.finishBuilding();
 
diff --git a/runtime/onert/test/util/ShapeInference.cc b/runtime/onert/test/util/ShapeInference.cc
index f1cbfd692..2ecaa2885 100644
--- a/runtime/onert/test/util/ShapeInference.cc
+++ b/runtime/onert/test/util/ShapeInference.cc
@@ -48,7 +48,7 @@ TEST(ShapeInference, Pool2DNodeSame)
   Padding padding{PaddingType::SAME};
 
   operation::Pool2D::Param avg_pool_param{
-      operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
   auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
 
   ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -58,7 +58,7 @@ TEST(ShapeInference, Pool2DNodeSame)
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
 
   operation::Pool2D::Param max_pool_param{
-      operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+    operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
   infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
 
   ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -75,7 +75,7 @@ TEST(ShapeInference, Pool2DNodeValid)
   Padding padding{PaddingType::VALID};
 
   operation::Pool2D::Param avg_pool_param{
-      operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
   auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
 
   ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -85,7 +85,7 @@ TEST(ShapeInference, Pool2DNodeValid)
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
 
   operation::Pool2D::Param max_pool_param{
-      operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+    operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
   infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
 
   ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -103,7 +103,7 @@ TEST(ShapeInference, Pool2DNodeExplicit)
   Padding padding{4, 3, 2, 1};
 
   operation::Pool2D::Param avg_pool_param{
-      operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
   auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
 
   ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -113,7 +113,7 @@ TEST(ShapeInference, Pool2DNodeExplicit)
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
 
   operation::Pool2D::Param max_pool_param{
-      operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+    operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
   infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
 
   ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -130,7 +130,7 @@ TEST(ShapeInference, neg_Pool2DNode_InvalidStride)
   Padding padding{PaddingType::SAME};
 
   operation::Pool2D::Param avg_pool_param{
-      operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
   ASSERT_THROW(onert::shape_inference::inferPoolShape(in_shape, avg_pool_param),
                std::runtime_error);
 }
@@ -161,7 +161,7 @@ TEST(ShapeInference, Conv2D)
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
 
   param =
-      operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
+    operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
   infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
 
   ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -190,7 +190,7 @@ TEST(ShapeInference, DepthwiseConv2D)
   operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3,
                                           Activation::NONE, Dilation{1, 1}};
   auto infered_out_shape =
-      onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+    onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
 
   ASSERT_EQ(infered_out_shape.rank(), 4);
   ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -364,7 +364,7 @@ TEST(ShapeInference, Transpose)
     ASSERT_EQ(in_shape.rank(), perm.size());
     ASSERT_EQ(expected.rank(), perm.size());
     auto inferred_out_shape =
-        onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size());
+      onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size());
     // post-conditions
     ASSERT_EQ(inferred_out_shape.rank(), perm.size());
     for (int32_t dim = 0; dim < expected.rank(); dim++)
@@ -479,8 +479,8 @@ TEST(ShapeInference, BCQFullyConnected)
 {
   auto check = [&](Shape &in_shape, Shape &cluster_shape, std::vector<int> cluster,
                    Shape &expected) {
-    auto actual = onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape,
-                                                                      cluster.data());
+    auto actual =
+      onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, cluster.data());
     ASSERT_EQ(actual.rank(), expected.rank());
 
     for (int32_t dim = 0; dim < expected.rank(); dim++)
author	Chunseok Lee <chunseok.lee@samsung.com>	2020-12-14 14:43:43 +0900
committer	Chunseok Lee <chunseok.lee@samsung.com>	2020-12-14 14:43:43 +0900
commit	62529acabbafce7730601ed01d5709d7bc0d378a (patch)
tree	bf6912cfa8fac4a2997292bfcb3c82055734c97e /runtime/onert
parent	6ea13af5257155ff993c205cf997b870cc627f73 (diff)
download	nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.gz nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.tar.bz2 nnfw-62529acabbafce7730601ed01d5709d7bc0d378a.zip