2095 files changed, 77633 insertions, 12831 deletions
diff --git a/.ahub/sam/exclude.txt b/.ahub/sam/exclude.txt
index c9ba5e084..f16f84f3c 100644
--- a/.ahub/sam/exclude.txt
+++ b/.ahub/sam/exclude.txt
@@ -5,6 +5,22 @@
 # Eigen
 /ONE/compiler/nnc/backends/soft_backend/code_snippets/eigen.def
 
+# Frontend test tools that are needed for release package build
+/ONE/compiler/circlechef
+/ONE/compiler/circle-verify
+/ONE/compiler/luci/tester
+
+# Exclude IR headers which have lots of similar patterns
+# TODO remove this when refactoring is possible
+/ONE/compiler/luci/lang/include/luci/IR/Nodes
+/ONE/compiler/luci/import/include/luci/Import/Nodes
+/ONE/compiler/loco/include/loco/IR
+/ONE/compiler/tflchef/tflite/src/Op/include
+
+# Exclude interpreter kernels which have similar patterns
+/ONE/compiler/luci-interpreter/src/kernels
+/ONE/compiler/locomotiv/src/Node
+
 # Test codes
 /ONE/tests
 
diff --git a/.ahub/tcchecker-tca/config.yaml b/.ahub/tcchecker-tca/config.yaml
index 95e11d0f9..73ec5489f 100644
--- a/.ahub/tcchecker-tca/config.yaml
+++ b/.ahub/tcchecker-tca/config.yaml
@@ -4,30 +4,23 @@ test:
     testCaseLanguage: CPP
     testFW: GTEST
     testCaseFolder:
-      - /compute/test/cker
-      - /runtime/onert/core/src/backend/basic
-      - /runtime/onert/frontend/nnapi
-      - /runtime/onert/test/core/compiler
-      - /runtime/onert/test/core/exec
-      - /runtime/onert/test/core/interp
-      - /runtime/onert/test/graph
-      - /runtime/onert/test/graph/operand
-      - /runtime/onert/test/graph/operation
-      - /runtime/onert/test/graph/verifier
-      - /runtime/onert/test/ir
-      - /runtime/onert/test/util
-      - /tests/nnfw_api/src
+      - /compute/cker
+      - /runtime/libs/misc
+      - /runtime/libs/ndarray
+      - /runtime/onert
+      - /tests/nnfw_api
 
     testFile:
-      - extension: cpp
+      - extension: test.cpp
         any: true
-      - extension: cc
+      - extension: test.cc
         any: true
     testCase:
       - condition:
         - functionName:
             starts:
               - TEST
+              - TYPED_TEST
         - excludes :
           - Verifier.dag_checker
           - graph_operand_LayoutSet.layout_set_operators
diff --git a/.github/workflows/check-pr-commit.yml b/.github/workflows/check-pr-commit.yml
index 38c76dc18..a3f4c1c92 100644
--- a/.github/workflows/check-pr-commit.yml
+++ b/.github/workflows/check-pr-commit.yml
@@ -5,6 +5,11 @@ on:
     branches:
       - master
       - release/*
+    types:
+      - opened
+      - synchronize
+      - reopened
+      - ready_for_review
 
 defaults:
   run:
@@ -14,6 +19,8 @@ jobs:
   check-commit-message:
     name: Check commit message
     runs-on: ubuntu-20.04
+    # Skip on draft, check on draft -> ready
+    if: github.event.pull_request.draft == false
 
     steps:
       - name: Checkout
diff --git a/compiler/arser/include/arser/arser.h b/compiler/arser/include/arser/arser.h
index 1703e421e..43f99dc5e 100644
--- a/compiler/arser/include/arser/arser.h
+++ b/compiler/arser/include/arser/arser.h
@@ -303,7 +303,7 @@ private:
   std::string _long_name;
   std::string _short_name;
   std::vector<std::string> _names;
-  std::string _type;
+  std::string _type = "string";
   std::string _help_message;
   std::function<void(void)> _func;
   uint32_t _nargs{1};
@@ -540,16 +540,20 @@ public:
     /*
     ** print usage
     */
+    auto print_usage_arg = [&](const arser::Argument &arg) {
+      stream << " ";
+      std::string arg_name = arser::internal::remove_dash(arg._long_name);
+      std::for_each(arg_name.begin(), arg_name.end(),
+                    [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); });
+    };
     stream << "Usage: ./" << parser._program_name << " ";
     // required optional argument
     for (const auto &arg : parser._optional_arg_vec)
     {
       if (!arg._is_required)
         continue;
-      stream << arg._short_name << " ";
-      std::string arg_name = arser::internal::remove_dash(arg._long_name);
-      std::for_each(arg_name.begin(), arg_name.end(),
-                    [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); });
+      stream << arg._short_name;
+      print_usage_arg(arg);
       stream << " ";
     }
     // rest of the optional argument
@@ -560,10 +564,7 @@ public:
       stream << "[" << arg._short_name;
       if (arg._nargs)
       {
-        stream << " ";
-        std::string arg_name = arser::internal::remove_dash(arg._long_name);
-        std::for_each(arg_name.begin(), arg_name.end(),
-                      [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); });
+        print_usage_arg(arg);
       }
       stream << "]"
              << " ";
@@ -591,39 +592,28 @@ public:
     }
 
     const size_t message_width = 60;
-    // positional argument
-    if (!parser._positional_arg_vec.empty())
-    {
-      stream << "[Positional argument]" << std::endl;
-      for (const auto &arg : parser._positional_arg_vec)
+    auto print_help_args = [&](const std::list<Argument> &args, const std::string &title) {
+      if (!args.empty())
       {
-        stream.width(length_of_longest_arg);
-        stream << std::left << arser::internal::make_comma_concatenated(arg._names) << "\t";
-        for (size_t i = 0; i < arg._help_message.length(); i += message_width)
+        stream << title << std::endl;
+        for (const auto &arg : args)
         {
-          if (i)
-            stream << std::string(length_of_longest_arg, ' ') << "\t";
-          stream << arg._help_message.substr(i, message_width) << std::endl;
+          stream.width(length_of_longest_arg);
+          stream << std::left << arser::internal::make_comma_concatenated(arg._names) << "\t";
+          for (size_t i = 0; i < arg._help_message.length(); i += message_width)
+          {
+            if (i)
+              stream << std::string(length_of_longest_arg, ' ') << "\t";
+            stream << arg._help_message.substr(i, message_width) << std::endl;
+          }
         }
+        std::cout << std::endl;
       }
-      std::cout << std::endl;
-    }
+    };
+    // positional argument
+    print_help_args(parser._positional_arg_vec, "[Positional argument]");
     // optional argument
-    if (!parser._optional_arg_vec.empty())
-    {
-      stream << "[Optional argument]" << std::endl;
-      for (const auto &arg : parser._optional_arg_vec)
-      {
-        stream.width(length_of_longest_arg);
-        stream << std::left << arser::internal::make_comma_concatenated(arg._names) << "\t";
-        for (size_t i = 0; i < arg._help_message.length(); i += message_width)
-        {
-          if (i)
-            stream << std::string(length_of_longest_arg, ' ') << "\t";
-          stream << arg._help_message.substr(i, message_width) << std::endl;
-        }
-      }
-    }
+    print_help_args(parser._optional_arg_vec, "[Optional argument]");
 
     return stream;
   }
@@ -737,6 +727,29 @@ template <typename T> T Arser::get(const std::string &arg_name)
   return get_impl(arg_name, static_cast<T *>(nullptr));
 }
 
+class Helper
+{
+public:
+  static void add_version(Arser &arser, const std::function<void(void)> &func)
+  {
+    arser.add_argument("--version")
+      .nargs(0)
+      .required(false)
+      .default_value(false)
+      .help("Show version information and exit")
+      .exit_with(func);
+  }
+
+  static void add_verbose(Arser &arser)
+  {
+    arser.add_argument("-V", "--verbose")
+      .nargs(0)
+      .required(false)
+      .default_value(false)
+      .help("output additional information to stdout or stderr");
+  }
+};
+
 } // namespace arser
 
 #endif // __ARSER_H__
diff --git a/compiler/circle-eval-diff/CMakeLists.txt b/compiler/circle-eval-diff/CMakeLists.txt
index 4d86f8097..d5a62301c 100644
--- a/compiler/circle-eval-diff/CMakeLists.txt
+++ b/compiler/circle-eval-diff/CMakeLists.txt
@@ -6,6 +6,7 @@ list(REMOVE_ITEM SOURCES ${TESTS})
 
 add_executable(circle-eval-diff ${DRIVER} ${SOURCES})
 target_include_directories(circle-eval-diff PRIVATE include)
+target_include_directories(circle-eval-diff PRIVATE src)
 
 target_link_libraries(circle-eval-diff arser)
 target_link_libraries(circle-eval-diff safemain)
@@ -17,6 +18,8 @@ target_link_libraries(circle-eval-diff luci_interpreter)
 target_link_libraries(circle-eval-diff dio_hdf5)
 target_link_libraries(circle-eval-diff vconone)
 
+install(TARGETS circle-eval-diff DESTINATION bin)
+
 if(NOT ENABLE_TEST)
   return()
 endif(NOT ENABLE_TEST)
@@ -25,10 +28,15 @@ endif(NOT ENABLE_TEST)
 # Instead, we use TEST_SOURCES to specify sources uesd for tests.
 set(TEST_SOURCES
     "src/MetricPrinter.cpp"
-    "src/Tensor.cpp")
+    "src/Tensor.cpp"
+    "src/InputDataLoader.cpp")
 
 nnas_find_package(GTest REQUIRED)
 GTest_AddTest(circle_eval_diff_test ${TESTS} ${TEST_SOURCES})
+target_include_directories(circle_eval_diff_test PRIVATE include)
 target_include_directories(circle_eval_diff_test PRIVATE src)
 target_link_libraries(circle_eval_diff_test luci_testhelper)
 target_link_libraries(circle_eval_diff_test nncc_coverage)
+target_link_libraries(circle_eval_diff_test dio_hdf5)
+target_link_libraries(circle_eval_diff_test loco)
+target_link_libraries(circle_eval_diff_test luci_lang)
diff --git a/compiler/circle-eval-diff/driver/Driver.cpp b/compiler/circle-eval-diff/driver/Driver.cpp
index f4a12a403..7e63ec88c 100644
--- a/compiler/circle-eval-diff/driver/Driver.cpp
+++ b/compiler/circle-eval-diff/driver/Driver.cpp
@@ -30,19 +30,15 @@ std::string to_lower_case(std::string s)
   return s;
 }
 
-Metric to_metric(const std::string &str)
-{
-  if (to_lower_case(str).compare("mae") == 0)
-    return Metric::MAE;
-
-  throw std::runtime_error("Unsupported metric.");
-}
-
 InputFormat to_input_format(const std::string &str)
 {
-  if (to_lower_case(str).compare("h5") == 0)
+  auto small_str = to_lower_case(str);
+  if (small_str.compare("h5") == 0)
     return InputFormat::H5;
 
+  if (small_str.compare("directory") == 0 || small_str.compare("dir") == 0)
+    return InputFormat::DIR;
+
   throw std::runtime_error("Unsupported input format.");
 }
 
@@ -58,50 +54,50 @@ int entry(const int argc, char **argv)
 {
   arser::Arser arser("Compare inference results of two circle models");
 
-  arser.add_argument("--version")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("Show version information and exit")
-    .exit_with(print_version);
+  arser::Helper::add_version(arser, print_version);
 
-  arser.add_argument("--first_model")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(true)
-    .help("First input model filepath");
+  arser.add_argument("--first_model").required(true).help("First input model filepath");
 
-  arser.add_argument("--second_model")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(true)
-    .help("Second input model filepath");
+  arser.add_argument("--second_model").required(true).help("Second input model filepath");
 
   arser.add_argument("--first_input_data")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(false)
     .help("Input data filepath for the first model. If not given, circle-eval-diff will run with "
           "randomly generated data");
 
   arser.add_argument("--second_input_data")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(false)
     .help("Input data filepath for the second model. If not given, circle-eval-diff will run with "
           "randomly generated data");
 
-  arser.add_argument("--metric")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(false)
-    .default_value("MAE")
-    .help("Metric for comparison (default: MAE)");
+  arser.add_argument("--dump_output_with_prefix")
+    .help("Dump output to files. <prefix> should be given as an argument. "
+          "Outputs are saved in <prefix>.<data_index>.first.output<output_index> and "
+          "<prefix>.<data_index>.second.output<output_index>.");
+
+  arser.add_argument("--print_mae").nargs(0).default_value(false).help("Print Mean Absolute Error");
+
+  arser.add_argument("--print_mape")
+    .nargs(0)
+    .default_value(false)
+    .help("Print Mean Absolute PercentageError");
+
+  arser.add_argument("--print_mpeir")
+    .nargs(0)
+    .default_value(false)
+    .help("Print Mean Peak Error to Interval Ratio");
+
+  arser.add_argument("--print_top1_match")
+    .nargs(0)
+    .default_value(false)
+    .help("Print Mean Top-1 Match Ratio");
+
+  arser.add_argument("--print_top5_match")
+    .nargs(0)
+    .default_value(false)
+    .help("Print Mean Top-5 Match Ratio");
+
+  arser.add_argument("--print_mse").nargs(0).default_value(false).help("Print Mean Squared Error");
 
   arser.add_argument("--input_data_format")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(false)
     .default_value("h5")
     .help("Input data format. h5/hdf5 (default) or directory");
 
@@ -124,6 +120,7 @@ int entry(const int argc, char **argv)
   std::string second_input_data_path;
   std::string metric;
   std::string input_data_format;
+  std::string output_prefix;
 
   if (arser["--first_input_data"])
     first_input_data_path = arser.get<std::string>("--first_input_data");
@@ -135,22 +132,54 @@ int entry(const int argc, char **argv)
     throw std::runtime_error("Input data path should be given for both first_model and "
                              "second_model, or neither must be given.");
 
-  metric = arser.get<std::string>("--metric");
+  if (arser["--dump_output_with_prefix"])
+    output_prefix = arser.get<std::string>("--dump_output_with_prefix");
+
+  // Set Metrics
+  std::vector<Metric> metrics;
+  if (arser["--print_mae"] and arser.get<bool>("--print_mae"))
+  {
+    metrics.emplace_back(Metric::MAE);
+  }
+  if (arser["--print_mape"] and arser.get<bool>("--print_mape"))
+  {
+    metrics.emplace_back(Metric::MAPE);
+  }
+  if (arser["--print_mpeir"] and arser.get<bool>("--print_mpeir"))
+  {
+    metrics.emplace_back(Metric::MPEIR);
+  }
+  if (arser["--print_top1_match"] and arser.get<bool>("--print_top1_match"))
+  {
+    metrics.emplace_back(Metric::MTOP1);
+  }
+  if (arser["--print_top5_match"] and arser.get<bool>("--print_top5_match"))
+  {
+    metrics.emplace_back(Metric::MTOP5);
+  }
+  if (arser["--print_mse"] and arser.get<bool>("--print_mse"))
+  {
+    metrics.emplace_back(Metric::MSE);
+  }
+
   input_data_format = arser.get<std::string>("--input_data_format");
 
   auto ctx = std::make_unique<CircleEvalDiff::Context>();
   {
     ctx->first_model_path = first_model_path;
     ctx->second_model_path = second_model_path;
-    ctx->metric = to_metric(metric);
+    ctx->first_input_data_path = first_input_data_path;
+    ctx->second_input_data_path = second_input_data_path;
+    ctx->metric = metrics;
     ctx->input_format = to_input_format(input_data_format);
+    ctx->output_prefix = output_prefix;
   }
 
   CircleEvalDiff ced(std::move(ctx));
 
   ced.init();
 
-  ced.evalDiff(first_input_data_path, second_input_data_path);
+  ced.evalDiff();
 
   return EXIT_SUCCESS;
 }
diff --git a/compiler/circle-eval-diff/include/CircleEvalDiff.h b/compiler/circle-eval-diff/include/CircleEvalDiff.h
index bf6aff46d..7894480ac 100644
--- a/compiler/circle-eval-diff/include/CircleEvalDiff.h
+++ b/compiler/circle-eval-diff/include/CircleEvalDiff.h
@@ -20,8 +20,12 @@
 #include <luci/IR/Module.h>
 #include <luci_interpreter/Interpreter.h>
 
+#include "InputDataLoader.h"
+#include "MetricPrinter.h"
+
 #include <string>
 #include <memory>
+#include <vector>
 
 namespace circle_eval_diff
 {
@@ -32,14 +36,12 @@ class ModuleEvalDiff;
 enum class Metric
 {
   Undefined, // For debugging
-  MAE,
-};
-
-enum class InputFormat
-{
-  Undefined, // For debugging
-  H5,
-  // TODO Implement Random, Directory
+  MAE,       // Mean Absolute Error
+  MAPE,      // Mean Percentage Absolute Error
+  MPEIR,     // Mean Peak Error to Interval Ratio
+  MTOP1,     // Mean Top-1 Match Ratio
+  MTOP5,     // Mean Top-5 Match Ratio
+  MSE,       // Mean Squared Error
 };
 
 class CircleEvalDiff final
@@ -49,8 +51,11 @@ public:
   {
     std::string first_model_path;
     std::string second_model_path;
-    Metric metric = Metric::Undefined;
+    std::string first_input_data_path;
+    std::string second_input_data_path;
+    std::vector<Metric> metric;
     InputFormat input_format = InputFormat::Undefined;
+    std::string output_prefix;
   };
 
 public:
@@ -61,12 +66,13 @@ public:
   void init();
 
   // Evaluate two circle models for the given input data and compare the results
-  void evalDiff(const std::string &first_input_data_path,
-                const std::string &second_input_data_path) const;
+  void evalDiff(void) const;
 
 private:
   std::unique_ptr<Context> _ctx;
-  std::unique_ptr<ModuleEvalDiff> _runner;
+  std::unique_ptr<luci::Module> _first_module;
+  std::unique_ptr<luci::Module> _second_module;
+  std::vector<std::unique_ptr<MetricPrinter>> _metrics;
 };
 
 } // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/CircleEvalDiff.cpp b/compiler/circle-eval-diff/src/CircleEvalDiff.cpp
index c39a11371..43e026bf6 100644
--- a/compiler/circle-eval-diff/src/CircleEvalDiff.cpp
+++ b/compiler/circle-eval-diff/src/CircleEvalDiff.cpp
@@ -15,8 +15,9 @@
  */
 
 #include "CircleEvalDiff.h"
-#include "ModuleEvalDiff.h"
+#include "InputDataLoader.h"
 #include "MetricPrinter.h"
+#include "Tensor.h"
 
 #include <foder/FileLoader.h>
 #include <luci/Importer.h>
@@ -26,6 +27,25 @@
 namespace
 {
 
+bool same_shape(const luci::CircleNode *a, const luci::CircleNode *b)
+{
+  if (a->rank() != b->rank())
+    return false;
+
+  for (uint32_t i = 0; i < a->rank(); i++)
+  {
+    if (not(a->dim(i) == b->dim(i)))
+      return false;
+  }
+
+  return true;
+}
+
+bool same_dtype(const luci::CircleNode *a, const luci::CircleNode *b)
+{
+  return a->dtype() == b->dtype();
+}
+
 std::unique_ptr<luci::Module> import(const std::string &model_path)
 {
   // Load model from the file
@@ -40,7 +60,12 @@ std::unique_ptr<luci::Module> import(const std::string &model_path)
     throw std::runtime_error("Failed to verify circle '" + model_path + "'");
   }
 
-  auto module = luci::Importer().importModule(circle::GetModel(model_data.data()));
+  auto circle_model = circle::GetModel(model_data.data());
+
+  if (not circle_model)
+    throw std::runtime_error("Failed to load '" + model_path + "'");
+
+  auto module = luci::Importer().importModule(circle_model);
 
   if (not module)
     throw std::runtime_error("Failed to load '" + model_path + "'");
@@ -48,50 +73,192 @@ std::unique_ptr<luci::Module> import(const std::string &model_path)
   return module;
 }
 
+const std::vector<loco::Node *> inputs_of(const luci::Module *module)
+{
+  return loco::input_nodes(module->graph());
+}
+
+const std::vector<loco::Node *> outputs_of(const luci::Module *module)
+{
+  return loco::output_nodes(module->graph());
+}
+
+void writeDataToFile(const std::string &filename, const char *data, size_t data_size)
+{
+  std::ofstream fs(filename, std::ofstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+  if (fs.write(data, data_size).fail())
+  {
+    throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
+  }
+}
+
+void checkOutputs(const luci::Module *first, const luci::Module *second)
+{
+  const auto first_output = outputs_of(first);
+  const auto second_output = outputs_of(second);
+
+  if (first_output.size() != second_output.size())
+    throw std::runtime_error("Models have different output counts");
+
+  for (uint32_t i = 0; i < first_output.size(); i++)
+  {
+    const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]);
+    const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]);
+
+    if (not same_shape(first_node, second_node))
+      throw std::runtime_error("Output shape mismatch (" + first_node->name() + ", " +
+                               second_node->name() + ")");
+
+    if (not same_dtype(first_node, second_node))
+      throw std::runtime_error("Output dtype mismatch (" + first_node->name() + ", " +
+                               second_node->name() + ")");
+  }
+}
+
 } // namespace
 
 namespace circle_eval_diff
 {
 
-CircleEvalDiff::CircleEvalDiff(std::unique_ptr<Context> &&ctx)
-  : _ctx(std::move(ctx)), _runner(nullptr)
+std::vector<std::shared_ptr<Tensor>> interpret(const luci::Module *module,
+                                               const InputDataLoader::Data &data)
+{
+  auto interpreter = std::make_unique<luci_interpreter::Interpreter>(module);
+
+  auto input_nodes = ::inputs_of(module);
+  auto output_nodes = ::outputs_of(module);
+
+  for (uint32_t input_idx = 0; input_idx < data.size(); input_idx++)
+  {
+    auto input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+    assert(input_node->index() == input_idx);
+
+    auto input_data = data.at(input_idx);
+    interpreter->writeInputTensor(input_node, input_data.buffer(), input_data.byte_size());
+  }
+
+  interpreter->interpret();
+
+  std::vector<std::shared_ptr<Tensor>> outputs;
+  for (uint32_t output_idx = 0; output_idx < output_nodes.size(); output_idx++)
+  {
+    auto output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[output_idx]);
+    assert(output_node->index() == output_idx);
+
+    auto tensor = createEmptyTensor(output_node);
+    interpreter->readOutputTensor(output_node, tensor->buffer(), tensor->byte_size());
+    outputs.emplace_back(tensor);
+  }
+
+  return outputs;
+}
+
+CircleEvalDiff::CircleEvalDiff(std::unique_ptr<Context> &&ctx) : _ctx(std::move(ctx))
 {
+  // DO NOTHING
 }
 
 CircleEvalDiff::~CircleEvalDiff() = default;
 
 void CircleEvalDiff::init()
 {
+  _first_module = import(_ctx->first_model_path);
+  _second_module = import(_ctx->second_model_path);
+
+  // Check modules have the same output signature (dtype/shape)
+  // Exception will be thrown if they have different signature
+  checkOutputs(_first_module.get(), _second_module.get());
+
   // Set metric
   std::unique_ptr<MetricPrinter> metric;
-  switch (_ctx->metric)
+  for (auto metric : _ctx->metric)
   {
-    case Metric::MAE:
-      metric = std::make_unique<MAEPrinter>();
-      break;
-    default:
-      throw std::runtime_error("Unsupported metric.");
+    switch (metric)
+    {
+      case Metric::MAE:
+      {
+        _metrics.emplace_back(std::make_unique<MAEPrinter>());
+        break;
+      }
+      case Metric::MAPE:
+      {
+        _metrics.emplace_back(std::make_unique<MAPEPrinter>());
+        break;
+      }
+      case Metric::MPEIR:
+      {
+        _metrics.emplace_back(std::make_unique<MPEIRPrinter>());
+        break;
+      }
+      case Metric::MTOP1:
+      {
+        _metrics.emplace_back(std::make_unique<TopKMatchPrinter>(1));
+        break;
+      }
+      case Metric::MTOP5:
+      {
+        _metrics.emplace_back(std::make_unique<TopKMatchPrinter>(5));
+        break;
+      }
+      case Metric::MSE:
+      {
+        _metrics.emplace_back(std::make_unique<MSEPrinter>());
+        break;
+      }
+      default:
+        throw std::runtime_error("Unsupported metric.");
+    }
+    _metrics.back()->init(_first_module.get(), _second_module.get());
   }
+}
 
-  auto first_module = import(_ctx->first_model_path);
-  auto second_module = import(_ctx->second_model_path);
+void CircleEvalDiff::evalDiff(void) const
+{
+  auto first_input_loader = circle_eval_diff::makeDataLoader(
+    _ctx->first_input_data_path, _ctx->input_format, ::inputs_of(_first_module.get()));
+  auto second_input_loader = circle_eval_diff::makeDataLoader(
+    _ctx->second_input_data_path, _ctx->input_format, ::inputs_of(_second_module.get()));
 
-  // Set runner
-  switch (_ctx->input_format)
+  for (uint32_t data_idx = 0; data_idx < first_input_loader->size(); data_idx++)
   {
-    case InputFormat::H5:
-      _runner = std::make_unique<H5InputEvalDiff>(std::move(first_module), std::move(second_module),
-                                                  std::move(metric));
-      break;
-    default:
-      throw std::runtime_error("Unsupported input format.");
+    std::cout << "Evaluating " << data_idx << "'th data" << std::endl;
+
+    auto first_data = first_input_loader->get(data_idx);
+    auto second_data = second_input_loader->get(data_idx);
+
+    auto first_output = interpret(_first_module.get(), first_data);
+    auto second_output = interpret(_second_module.get(), second_data);
+
+    for (auto &metric : _metrics)
+    {
+      metric->accumulate(first_output, second_output);
+    }
+
+    if (_ctx.get()->output_prefix.empty())
+      continue;
+
+    for (uint32_t i = 0; i < first_output.size(); i++)
+    {
+      auto out = first_output[i];
+      writeDataToFile(_ctx.get()->output_prefix + "." + std::to_string(data_idx) + ".first.output" +
+                        std::to_string(i),
+                      (char *)(out->buffer()), out->byte_size());
+    }
+    for (uint32_t i = 0; i < second_output.size(); i++)
+    {
+      auto out = second_output[i];
+      writeDataToFile(_ctx.get()->output_prefix + "." + std::to_string(data_idx) +
+                        ".second.output" + std::to_string(i),
+                      (char *)(out->buffer()), out->byte_size());
+    }
   }
-}
 
-void CircleEvalDiff::evalDiff(const std::string &first_input_data_path,
-                              const std::string &second_input_data_path) const
-{
-  _runner->evalDiff(first_input_data_path, second_input_data_path);
+  for (auto &metric : _metrics)
+  {
+    std::cout << metric.get() << std::endl;
+  }
 }
 
 } // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/InputDataLoader.cpp b/compiler/circle-eval-diff/src/InputDataLoader.cpp
new file mode 100644
index 000000000..99276f32a
--- /dev/null
+++ b/compiler/circle-eval-diff/src/InputDataLoader.cpp
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "InputDataLoader.h"
+
+#include <dio_hdf5/HDF5Importer.h>
+#include <loco/IR/Graph.h>
+#include <luci/IR/CircleNodes.h>
+
+#include <cstring>
+#include <dirent.h>
+#include <fstream>
+#include <vector>
+
+using DataType = loco::DataType;
+using Shape = std::vector<loco::Dimension>;
+
+namespace circle_eval_diff
+{
+
+// Check the type and the shape of CircleInput
+void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype, const Shape &shape)
+{
+  // Type check
+  if (dtype != input_node->dtype())
+    throw std::runtime_error("Wrong input type.");
+
+  if (shape.size() != input_node->rank())
+    throw std::runtime_error("Input rank mismatch.");
+
+  for (uint32_t i = 0; i < shape.size(); i++)
+  {
+    if (not(shape.at(i) == input_node->dim(i)))
+      throw std::runtime_error("Input shape mismatch.");
+  }
+}
+
+std::vector<size_t> getEachByteSizeOf(const std::vector<loco::Node *> &nodes)
+{
+  std::vector<size_t> vec;
+
+  for (const auto node : nodes)
+  {
+    const auto input_node = loco::must_cast<const luci::CircleInput *>(node);
+    size_t element_size = 1;
+
+    for (uint32_t index = 0; index < input_node->rank(); index++)
+    {
+      element_size *= input_node->dim(index).value();
+    }
+
+    vec.push_back(element_size);
+  }
+
+  return vec;
+}
+
+size_t getTotalByteSizeOf(const std::vector<loco::Node *> &nodes)
+{
+  size_t total_byte_size = 0;
+
+  for (const auto node : nodes)
+  {
+    const auto input_node = loco::must_cast<const luci::CircleInput *>(node);
+    size_t byte_size = loco::size(input_node->dtype());
+
+    for (uint32_t index = 0; index < input_node->rank(); index++)
+    {
+      byte_size *= input_node->dim(index).value();
+    }
+
+    total_byte_size += byte_size;
+  }
+
+  return total_byte_size;
+}
+
+} // namespace circle_eval_diff
+
+namespace circle_eval_diff
+{
+
+HDF5Loader::HDF5Loader(const std::string &file_path, const std::vector<loco::Node *> &input_nodes)
+  : _input_nodes{input_nodes}
+{
+  try
+  {
+    using HDF5Importer = dio::hdf5::HDF5Importer;
+
+    _hdf5 = std::make_unique<HDF5Importer>(file_path);
+    _hdf5->importGroup("value");
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    throw std::runtime_error("HDF5 error occurred.");
+  }
+}
+
+uint32_t HDF5Loader::size(void) const { return _hdf5->numData(); }
+
+InputDataLoader::Data HDF5Loader::get(uint32_t data_idx) const
+{
+  Data data;
+  data.resize(_input_nodes.size());
+
+  for (uint32_t input_idx = 0; input_idx < _input_nodes.size(); input_idx++)
+  {
+    auto input_node = loco::must_cast<luci::CircleInput *>(_input_nodes.at(input_idx));
+    assert(input_node->index() == input_idx);
+
+    data.at(input_idx) = *createEmptyTensor(input_node).get();
+
+    auto input_buffer = data.at(input_idx).buffer();
+    try
+    {
+      if (_hdf5->isRawData())
+      {
+        _hdf5->readTensor(data_idx, input_idx, input_buffer);
+      }
+      else
+      {
+        DataType dtype;
+        Shape shape;
+        _hdf5->readTensor(data_idx, input_idx, &dtype, &shape, input_buffer);
+
+        // Check the type and the shape of the input data is valid
+        verifyTypeShape(input_node, dtype, shape);
+      }
+    }
+    catch (const H5::Exception &e)
+    {
+      H5::Exception::printErrorStack();
+      throw std::runtime_error("HDF5 error occurred.");
+    }
+  }
+
+  return data;
+}
+
+DirectoryLoader::DirectoryLoader(const std::string &dir_path,
+                                 const std::vector<loco::Node *> &input_nodes)
+  : _input_nodes{input_nodes}
+{
+  DIR *dir = opendir(dir_path.c_str());
+  if (not dir)
+  {
+    throw std::runtime_error("Cannot open directory \"" + dir_path + "\".");
+  }
+
+  struct dirent *entry = nullptr;
+  const auto input_total_bytes = getTotalByteSizeOf(input_nodes);
+  while (entry = readdir(dir))
+  {
+    // Skip if the entry is not a regular file
+    if (entry->d_type != DT_REG)
+      continue;
+
+    _data_paths.push_back(dir_path + "/" + entry->d_name);
+  }
+
+  closedir(dir);
+}
+
+uint32_t DirectoryLoader::size(void) const { return _data_paths.size(); }
+
+InputDataLoader::Data DirectoryLoader::get(uint32_t data_idx) const
+{
+  // Read raw data
+  const auto input_total_bytes = getTotalByteSizeOf(_input_nodes);
+  std::vector<char> input_data(input_total_bytes);
+  const auto raw_data_path = _data_paths.at(data_idx);
+  std::ifstream fs(raw_data_path, std::ifstream::binary);
+
+  if (fs.fail())
+  {
+    throw std::runtime_error("Cannot open file \"" + raw_data_path + "\".");
+  }
+  if (fs.read(input_data.data(), input_total_bytes).fail())
+  {
+    throw std::runtime_error("Failed to read raw data from file \"" + raw_data_path + "\".");
+  }
+
+  // Make Tensor from raw data
+  auto input_data_cur = input_data.data();
+
+  Data data;
+  data.resize(_input_nodes.size());
+  std::vector<size_t> input_bytes = getEachByteSizeOf(_input_nodes);
+  for (uint32_t index = 0; index < _input_nodes.size(); index++)
+  {
+    const auto input_node = loco::must_cast<const luci::CircleInput *>(_input_nodes.at(index));
+    auto &tensor = data.at(index);
+    tensor = *createEmptyTensor(input_node).get();
+    auto buffer = tensor.buffer();
+    std::memcpy(buffer, input_data_cur, input_bytes.at(index));
+    input_data_cur += input_bytes.at(index);
+  }
+
+  return data;
+}
+
+std::unique_ptr<InputDataLoader> makeDataLoader(const std::string &file_path,
+                                                const InputFormat &format,
+                                                const std::vector<loco::Node *> &input_nodes)
+{
+  switch (format)
+  {
+    case InputFormat::H5:
+    {
+      return std::make_unique<HDF5Loader>(file_path, input_nodes);
+    }
+    case InputFormat::DIR:
+    {
+      return std::make_unique<DirectoryLoader>(file_path, input_nodes);
+    }
+    default:
+      throw std::runtime_error{"Unsupported input format."};
+  }
+}
+
+} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/InputDataLoader.h b/compiler/circle-eval-diff/src/InputDataLoader.h
new file mode 100644
index 000000000..14921b239
--- /dev/null
+++ b/compiler/circle-eval-diff/src/InputDataLoader.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EVAL_DIFF_INPUT_DATA_LOADER_H__
+#define __CIRCLE_EVAL_DIFF_INPUT_DATA_LOADER_H__
+
+#include <dio_hdf5/HDF5Importer.h>
+#include <loco/IR/Node.h>
+#include <luci/IR/CircleNodes.h>
+
+#include "Tensor.h"
+
+#include <memory>
+#include <string>
+
+namespace circle_eval_diff
+{
+
+void verifyTypeShape(const luci::CircleInput *input_node, const loco::DataType &dtype,
+                     const std::vector<loco::Dimension> &shape);
+
+} // namespace circle_eval_diff
+
+namespace circle_eval_diff
+{
+
+enum class InputFormat
+{
+  Undefined, // For debugging
+  H5,
+  DIR, // directory
+  // TODO Implement Random, Directory
+};
+
+class InputDataLoader
+{
+public:
+  using Data = std::vector<Tensor>;
+
+public:
+  virtual ~InputDataLoader() = default;
+
+public:
+  virtual uint32_t size(void) const = 0;
+
+public:
+  virtual Data get(uint32_t data_idx) const = 0;
+};
+
+class HDF5Loader final : public InputDataLoader
+{
+public:
+  HDF5Loader(const std::string &file_path, const std::vector<loco::Node *> &input_nodes);
+
+public:
+  uint32_t size(void) const final;
+  Data get(uint32_t data_idx) const final;
+
+private:
+  const std::vector<loco::Node *> _input_nodes;
+  std::unique_ptr<dio::hdf5::HDF5Importer> _hdf5;
+};
+
+// This class loads the directory that has raw data binary files.
+class DirectoryLoader final : public InputDataLoader
+{
+public:
+  DirectoryLoader(const std::string &dir_path, const std::vector<loco::Node *> &input_nodes);
+
+public:
+  uint32_t size(void) const final;
+  Data get(uint32_t data_idx) const final;
+
+private:
+  const std::vector<loco::Node *> _input_nodes;
+  std::vector<std::string> _data_paths;
+};
+
+std::unique_ptr<InputDataLoader> makeDataLoader(const std::string &file_path,
+                                                const InputFormat &format,
+                                                const std::vector<loco::Node *> &input_nodes);
+
+} // namespace circle_eval_diff
+
+#endif // __CIRCLE_EVAL_DIFF_INPUT_DATA_LOADER_H__
diff --git a/compiler/circle-eval-diff/src/InputDataLoader.test.cpp b/compiler/circle-eval-diff/src/InputDataLoader.test.cpp
new file mode 100644
index 000000000..cbe78797b
--- /dev/null
+++ b/compiler/circle-eval-diff/src/InputDataLoader.test.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <luci/IR/CircleNodes.h>
+
+#include "InputDataLoader.h"
+
+using namespace circle_eval_diff;
+
+TEST(CircleEvalInputDataLoaderTest, verifyTypeShapeTest)
+{
+  luci::CircleInput input;
+  input.dtype(loco::DataType::FLOAT32);
+  input.rank(4);
+  input.dim(0).set(1);
+  input.dim(1).set(3);
+  input.dim(2).set(3);
+  input.dim(3).set(2);
+
+  loco::DataType right_data_type{loco::DataType::FLOAT32};
+  std::vector<loco::Dimension> right_shape;
+  right_shape.emplace_back(1);
+  right_shape.emplace_back(3);
+  right_shape.emplace_back(3);
+  right_shape.emplace_back(2);
+
+  EXPECT_NO_THROW(verifyTypeShape(&input, right_data_type, right_shape));
+}
+
+TEST(CircleEvalInputDataLoaderTest, verifyTypeShapeTest_NEG)
+{
+  luci::CircleInput input;
+  input.dtype(loco::DataType::FLOAT32);
+  input.rank(4);
+  input.dim(0).set(1);
+  input.dim(1).set(4);
+  input.dim(2).set(4);
+  input.dim(3).set(2);
+
+  loco::DataType right_data_type{loco::DataType::FLOAT32};
+  loco::DataType wrong_data_type{loco::DataType::FLOAT16};
+  std::vector<loco::Dimension> wrong_shape;
+  wrong_shape.emplace_back(1);
+  wrong_shape.emplace_back(3);
+  wrong_shape.emplace_back(3);
+  wrong_shape.emplace_back(2);
+
+  EXPECT_ANY_THROW(verifyTypeShape(&input, right_data_type, wrong_shape));
+  EXPECT_ANY_THROW(verifyTypeShape(&input, wrong_data_type, wrong_shape));
+}
diff --git a/compiler/circle-eval-diff/src/MetricPrinter.cpp b/compiler/circle-eval-diff/src/MetricPrinter.cpp
index d65eb9b63..ec8408471 100644
--- a/compiler/circle-eval-diff/src/MetricPrinter.cpp
+++ b/compiler/circle-eval-diff/src/MetricPrinter.cpp
@@ -18,6 +18,7 @@
 
 #include <luci/IR/CircleNode.h>
 
+#include <limits>
 #include <iostream>
 #include <cassert>
 
@@ -30,6 +31,16 @@ using Tensor = circle_eval_diff::Tensor;
 namespace
 {
 
+uint32_t num_elems(const luci::CircleNode *node)
+{
+  uint32_t res = 1;
+
+  for (uint32_t i = 0; i < node->rank(); i++)
+    res *= node->dim(i).value();
+
+  return res;
+}
+
 template <typename T> bool same_shape(const T a, const T b)
 {
   if (a->rank() != b->rank())
@@ -44,6 +55,8 @@ template <typename T> bool same_shape(const T a, const T b)
   return true;
 }
 
+template <typename T> bool same_dtype(const T a, const T b) { return a->dtype() == b->dtype(); }
+
 template <loco::DataType DT> std::shared_ptr<Tensor> to_fp32(const std::shared_ptr<Tensor> &tensor)
 {
   assert(tensor->dtype() == DT); // FIX_CALLER_UNLESS
@@ -97,7 +110,6 @@ void MAEPrinter::init(const luci::Module *first, const luci::Module *second)
   {
     const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]);
     const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]);
-    assert(same_shape(first_node, second_node)); // FIX_CALLER_UNLESS
 
     // Create tensors to store intermediate results
     _intermediate.emplace_back();
@@ -180,6 +192,471 @@ void MAEPrinter::dump(std::ostream &os) const
   }
 }
 
+// TODO Remove duplicate codes with MAEPrinter
+void MAPEPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+  THROW_UNLESS(first != nullptr, "Invalid module.");
+  THROW_UNLESS(second != nullptr, "Invalid module.");
+
+  const auto first_output = loco::output_nodes(first->graph());
+  const auto second_output = loco::output_nodes(second->graph());
+
+  assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < first_output.size(); i++)
+  {
+    const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]);
+    const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]);
+
+    // Create tensors to store intermediate results
+    _intermediate.emplace_back();
+    _intermediate.at(i).dtype(loco::DataType::FLOAT32);
+    // NOTE Use both first_node and second_node to avoid release build break
+    _intermediate.at(i).rank(first_node->rank());
+    uint32_t num_elems = 1;
+    for (uint32_t j = 0; j < second_node->rank(); j++)
+    {
+      _intermediate.at(i).dim(j) = second_node->dim(j);
+      num_elems *= second_node->dim(j).value();
+    }
+    _intermediate.at(i).size<loco::DataType::FLOAT32>(num_elems);
+
+    // Check the buffer is initilized with zero
+    for (uint32_t j = 0; j < num_elems; j++)
+      assert(_intermediate.at(i).at<loco::DataType::FLOAT32>(j) == 0.0);
+
+    // Save output names for logging
+    _output_names.emplace_back(first_node->name());
+  }
+}
+
+// Accumulate |(a - b) / a|
+void MAPEPrinter::accum_mean_absolute_error(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+                                            const std::shared_ptr<Tensor> &b)
+{
+  assert(a->dtype() == loco::DataType::FLOAT32 and
+         b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+  assert(same_shape(a.get(), b.get()));          // FIX_CALLER_UNLESS
+  assert(output_idx < _intermediate.size());     // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+  {
+    const auto a_val = a->at<loco::DataType::FLOAT32>(i);
+    const auto b_val = b->at<loco::DataType::FLOAT32>(i);
+    _intermediate.at(output_idx).at<loco::DataType::FLOAT32>(i) +=
+      std::abs((a_val - b_val) / a_val);
+  }
+}
+
+// Assumption
+// first: the result of fp32 model
+// second: the result of fake-quantized model
+void MAPEPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                             const std::vector<std::shared_ptr<Tensor>> &second)
+{
+  assert(first.size() == second.size());        // FIX_CALLER_UNLESS
+  assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto first_output = first[output_idx];
+    const auto second_output = second[output_idx];
+
+    // Cast data to fp32 and then compute absolute error
+    const auto fp32_first_output = fp32(first_output);
+    const auto fp32_second_output = fp32(second_output);
+
+    accum_mean_absolute_error(output_idx, fp32_first_output, fp32_second_output);
+  }
+
+  _num_data++;
+}
+
+void MAPEPrinter::dump(std::ostream &os) const
+{
+  os << "Mean Absolute Percentage Error (MAPE)" << std::endl;
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto name = _output_names.at(output_idx);
+    const auto &inter = _intermediate.at(output_idx);
+    assert(inter.dtype() == loco::DataType::FLOAT32); // FIX_ME_UNLESS
+    const auto elem_count = inter.size<loco::DataType::FLOAT32>();
+
+    // Compute MAPE
+    float mape = 0.0;
+    for (uint32_t elem_idx = 0; elem_idx < elem_count; elem_idx++)
+      mape += inter.at<loco::DataType::FLOAT32>(elem_idx);
+
+    mape = mape / elem_count;
+    mape = mape / _num_data;
+    mape *= 100.0;
+
+    os << "MAPE for " << name << " is " << mape << "%" << std::endl;
+  }
+}
+
+// TODO Remove duplicate codes with MAEPrinter
+void MPEIRPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+  THROW_UNLESS(first != nullptr, "Invalid module.");
+  THROW_UNLESS(second != nullptr, "Invalid module.");
+
+  const auto first_output = loco::output_nodes(first->graph());
+  const auto second_output = loco::output_nodes(second->graph());
+
+  assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < first_output.size(); i++)
+  {
+    const auto first_node = loco::must_cast<luci::CircleOutput *>(first_output[i]);
+    const auto second_node = loco::must_cast<luci::CircleOutput *>(second_output[i]);
+
+    // Create places to store intermediate results
+    _intermediate.emplace_back(0.0);
+
+    // Save output names for logging
+    _output_names.emplace_back(first_node->name());
+  }
+}
+
+// Accumulate PEIR (Peak Error to Interval Ratio)
+// PEIR = max(|a - b|) / (max(a) - min(a))
+// PEIR >= 0 (lower is better)
+void MPEIRPrinter::accum_peir(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+                              const std::shared_ptr<Tensor> &b)
+{
+  assert(a->dtype() == loco::DataType::FLOAT32 and
+         b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+  assert(same_shape(a.get(), b.get()));          // FIX_CALLER_UNLESS
+  assert(output_idx < _intermediate.size());     // FIX_CALLER_UNLESS
+
+  float min = std::numeric_limits<float>::max();
+  float max = std::numeric_limits<float>::lowest();
+
+  for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+  {
+    const auto a_val = a->at<loco::DataType::FLOAT32>(i);
+    min = std::min(a_val, min);
+    max = std::max(a_val, max);
+  }
+
+  float interval = max - min;
+
+  // Corner case: All values are the same. We set interval = 1 in this case
+  if (interval == 0)
+    interval = 1.0;
+
+  float peak_error = std::numeric_limits<float>::lowest();
+
+  for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+  {
+    const auto a_val = a->at<loco::DataType::FLOAT32>(i);
+    const auto b_val = b->at<loco::DataType::FLOAT32>(i);
+    const auto error = std::abs(a_val - b_val);
+    peak_error = std::max(error, peak_error);
+  }
+
+  _intermediate.at(output_idx) += peak_error / interval;
+}
+
+// Assumption (when testing the accuracy of quantized model)
+// first: the result of fp32 model
+// second: the result of fake-quantized model
+void MPEIRPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                              const std::vector<std::shared_ptr<Tensor>> &second)
+{
+  assert(first.size() == second.size());        // FIX_CALLER_UNLESS
+  assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto first_output = first[output_idx];
+    const auto second_output = second[output_idx];
+
+    // Cast data to fp32 for ease of computation
+    const auto fp32_first_output = fp32(first_output);
+    const auto fp32_second_output = fp32(second_output);
+
+    accum_peir(output_idx, fp32_first_output, fp32_second_output);
+  }
+
+  _num_data++;
+}
+
+void MPEIRPrinter::dump(std::ostream &os) const
+{
+  os << "Mean Peak Error to Interval Ratio (MPEIR)" << std::endl;
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto name = _output_names.at(output_idx);
+    const auto sum_of_peir = _intermediate.at(output_idx);
+
+    // Compute MPEIR
+    float mpeir = sum_of_peir / _num_data;
+
+    os << "MPEIR for " << name << " is " << mpeir << std::endl;
+  }
+}
+
+// TODO Remove duplicate codes with MAEPrinter
+void TopKMatchPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+  THROW_UNLESS(first != nullptr, "Invalid module.");
+  THROW_UNLESS(second != nullptr, "Invalid module.");
+
+  const auto first_output = loco::output_nodes(first->graph());
+  const auto second_output = loco::output_nodes(second->graph());
+
+  assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < first_output.size(); i++)
+  {
+    const auto first_node = loco::must_cast<luci::CircleOutput *>(first_output[i]);
+    const auto second_node = loco::must_cast<luci::CircleOutput *>(second_output[i]);
+
+    // Create places to store intermediate results
+    _intermediate.emplace_back(0.0);
+
+    // Save output names for logging
+    _output_names.emplace_back(first_node->name());
+
+    // If num_elems of an output is less than k,
+    // the output index is added to the skip list
+    if (num_elems(first_node) < _k)
+    {
+      std::cout << "Top-" << _k << "metric for " << first_node->name()
+                << " is ignored, because it has elements less than " << _k << std::endl;
+      _skip_output.emplace_back(i);
+    }
+  }
+}
+
+void TopKMatchPrinter::accum_topk_accuracy(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+                                           const std::shared_ptr<Tensor> &b)
+{
+  assert(a->dtype() == loco::DataType::FLOAT32 and
+         b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+  assert(same_shape(a.get(), b.get()));          // FIX_CALLER_UNLESS
+  assert(output_idx < _intermediate.size());     // FIX_CALLER_UNLESS
+
+  // Find Top-k largest elements
+  // This implementation is a variant of "Method 2 (Use temporary array)" in
+  // https://www.geeksforgeeks.org/k-largestor-smallest-elements-in-an-array/
+  // We sort top-k elements by value and index to ensure that the element with an earlier
+  // index comes first if multiple elements have the same value.
+  auto find_topk = [this](const std::shared_ptr<Tensor> &tensor) {
+    assert(_k <= tensor->size<loco::DataType::FLOAT32>()); // FIX_CALLER_UNLESS
+
+    // first: value, second: index
+    std::vector<std::pair<float, uint32_t>> topk;
+    topk.resize(_k);
+
+    // Initialize
+    for (uint32_t i = 0; i < _k; i++)
+    {
+      topk[i] = std::make_pair(tensor->at<loco::DataType::FLOAT32>(i), i);
+    }
+
+    // Input pair: (value, index)
+    // Return true if a has smaller value than b. If a and b have the same value,
+    // return true if a has larger index.
+    auto compare = [](const std::pair<float, uint32_t> &a, const std::pair<float, uint32_t> &b) {
+      if (a.first == b.first)
+        return a.second > b.second;
+
+      return a.first < b.first;
+    };
+
+    for (uint32_t i = _k; i < tensor->size<loco::DataType::FLOAT32>(); i++)
+    {
+      auto val = std::make_pair(tensor->at<loco::DataType::FLOAT32>(i), i);
+
+      auto min = std::min_element(topk.begin(), topk.end(), compare);
+      if (compare(*min, val))
+      {
+        // val is larger than min. Replace min with val.
+        auto min_index = std::distance(topk.begin(), min);
+        topk[min_index] = val;
+      }
+    }
+
+    return topk;
+  };
+
+  auto first_topk = find_topk(a);
+  auto second_topk = find_topk(b);
+
+  uint32_t matched = 0;
+  for (uint32_t i = 0; i < _k; i++)
+  {
+    for (uint32_t j = 0; j < _k; j++)
+    {
+      if (first_topk[i].second == second_topk[j].second)
+      {
+        matched++;
+        break;
+      }
+    }
+  }
+
+  float matched_ratio = static_cast<float>(matched) / _k;
+
+  _intermediate.at(output_idx) += matched_ratio;
+}
+
+bool TopKMatchPrinter::in_skip_list(uint32_t output_index) const
+{
+  for (auto skip : _skip_output)
+  {
+    if (output_index == skip)
+      return true;
+  }
+
+  return false;
+}
+
+void TopKMatchPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                                  const std::vector<std::shared_ptr<Tensor>> &second)
+{
+  assert(first.size() == second.size());        // FIX_CALLER_UNLESS
+  assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    if (in_skip_list(output_idx))
+      continue;
+
+    const auto first_output = first[output_idx];
+    const auto second_output = second[output_idx];
+
+    // Cast data to fp32 for ease of computation
+    const auto fp32_first_output = fp32(first_output);
+    const auto fp32_second_output = fp32(second_output);
+
+    accum_topk_accuracy(output_idx, fp32_first_output, fp32_second_output);
+  }
+
+  _num_data++;
+}
+
+void TopKMatchPrinter::dump(std::ostream &os) const
+{
+  os << "Ratio of Matched Indices between Top-" << _k << " results of the models" << std::endl;
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    if (in_skip_list(output_idx))
+      continue;
+
+    const auto name = _output_names.at(output_idx);
+    const auto sum_of_topk_accuracy = _intermediate.at(output_idx);
+
+    // Compute TopKMatch
+    float mean_topk = sum_of_topk_accuracy / _num_data;
+
+    os << "Mean Top-" << _k << " match ratio for " << name << " is " << mean_topk << std::endl;
+  }
+}
+
+void MSEPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+  THROW_UNLESS(first != nullptr, "Invalid module.");
+  THROW_UNLESS(second != nullptr, "Invalid module.");
+
+  const auto first_output = loco::output_nodes(first->graph());
+  const auto second_output = loco::output_nodes(second->graph());
+
+  assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < first_output.size(); i++)
+  {
+    const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]);
+    const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]);
+
+    // Create tensors to store intermediate results
+    _intermediate.emplace_back();
+    _intermediate.at(i).dtype(loco::DataType::FLOAT32);
+    // NOTE Use both first_node and second_node to avoid release build break
+    _intermediate.at(i).rank(first_node->rank());
+    uint32_t num_elems = 1;
+    for (uint32_t j = 0; j < second_node->rank(); j++)
+    {
+      _intermediate.at(i).dim(j) = second_node->dim(j);
+      num_elems *= second_node->dim(j).value();
+    }
+    _intermediate.at(i).size<loco::DataType::FLOAT32>(num_elems);
+
+    // Check the buffer is initilized with zero
+    for (uint32_t j = 0; j < num_elems; j++)
+      assert(_intermediate.at(i).at<loco::DataType::FLOAT32>(j) == 0.0);
+
+    // Save output names for logging
+    _output_names.emplace_back(first_node->name());
+  }
+}
+
+void MSEPrinter::accum_squared_error(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+                                     const std::shared_ptr<Tensor> &b)
+{
+  assert(a->dtype() == loco::DataType::FLOAT32 and
+         b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+  assert(same_shape(a.get(), b.get()));          // FIX_CALLER_UNLESS
+  assert(output_idx < _intermediate.size());     // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+  {
+    _intermediate.at(output_idx).at<loco::DataType::FLOAT32>(i) +=
+      (a->at<loco::DataType::FLOAT32>(i) - b->at<loco::DataType::FLOAT32>(i)) *
+      (a->at<loco::DataType::FLOAT32>(i) - b->at<loco::DataType::FLOAT32>(i));
+  }
+}
+
+void MSEPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                            const std::vector<std::shared_ptr<Tensor>> &second)
+{
+  assert(first.size() == second.size());        // FIX_CALLER_UNLESS
+  assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto first_output = first[output_idx];
+    const auto second_output = second[output_idx];
+
+    // Cast data to fp32 and then compute absolute error
+    const auto fp32_first_output = fp32(first_output);
+    const auto fp32_second_output = fp32(second_output);
+
+    accum_squared_error(output_idx, fp32_first_output, fp32_second_output);
+  }
+
+  _num_data++;
+}
+
+void MSEPrinter::dump(std::ostream &os) const
+{
+  os << "Mean Squared Error (MSE)" << std::endl;
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto name = _output_names.at(output_idx);
+    const auto &inter = _intermediate.at(output_idx);
+    assert(inter.dtype() == loco::DataType::FLOAT32); // FIX_ME_UNLESS
+    const auto elem_count = inter.size<loco::DataType::FLOAT32>();
+
+    // Compute MSE
+    float mse = 0.0;
+    for (uint32_t elem_idx = 0; elem_idx < elem_count; elem_idx++)
+      mse += inter.at<loco::DataType::FLOAT32>(elem_idx);
+
+    mse = mse / elem_count;
+    mse = mse / _num_data;
+
+    os << "MSE for " << name << " is " << mse << std::endl;
+  }
+}
+
 } // namespace circle_eval_diff
 
 #undef THROW_UNLESS
diff --git a/compiler/circle-eval-diff/src/MetricPrinter.h b/compiler/circle-eval-diff/src/MetricPrinter.h
index b51581c31..c8f27511c 100644
--- a/compiler/circle-eval-diff/src/MetricPrinter.h
+++ b/compiler/circle-eval-diff/src/MetricPrinter.h
@@ -85,6 +85,133 @@ private:
   uint32_t _num_data = 0;
 };
 
+// Mean Squared Error
+class MSEPrinter final : public MetricPrinter
+{
+public:
+  void init(const luci::Module *first, const luci::Module *second);
+
+  void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                  const std::vector<std::shared_ptr<Tensor>> &second);
+
+  void dump(std::ostream &os) const;
+
+private:
+  void accum_squared_error(uint32_t index, const std::shared_ptr<Tensor> &a,
+                           const std::shared_ptr<Tensor> &b);
+
+private:
+  // Store accumulated sum of absolute error for each output
+  std::vector<Tensor> _intermediate;
+  std::vector<std::string> _output_names;
+  uint32_t _num_data = 0;
+};
+
+// Mean Absolute Percentage Error
+class MAPEPrinter final : public MetricPrinter
+{
+public:
+  void init(const luci::Module *first, const luci::Module *second);
+
+  void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                  const std::vector<std::shared_ptr<Tensor>> &second);
+
+  void dump(std::ostream &os) const;
+
+private:
+  void accum_mean_absolute_error(uint32_t index, const std::shared_ptr<Tensor> &a,
+                                 const std::shared_ptr<Tensor> &b);
+
+private:
+  // Store accumulated sum of absolute error for each output
+  std::vector<Tensor> _intermediate;
+  std::vector<std::string> _output_names;
+  uint32_t _num_data = 0;
+};
+
+// Mean Peak Error to Interval Ratio (PEIR)
+// PEIR = max(|a - b|) / (max(a) - min(a))
+// PEIR >= 0 (lower is better)
+//
+// When testing the accuracy of quantized model,
+// the first model should be the original fp32 model, and
+// the second model should be the fake-quantized fp32 model
+class MPEIRPrinter final : public MetricPrinter
+{
+public:
+  void init(const luci::Module *first, const luci::Module *second);
+
+  void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                  const std::vector<std::shared_ptr<Tensor>> &second);
+
+  void dump(std::ostream &os) const;
+
+private:
+  void accum_peir(uint32_t index, const std::shared_ptr<Tensor> &a,
+                  const std::shared_ptr<Tensor> &b);
+
+private:
+  // Store accumulated sum of PEIR for each output
+  std::vector<float> _intermediate;
+  std::vector<std::string> _output_names;
+  uint32_t _num_data = 0;
+};
+
+// Ratio of matched indices between top-k results of two models (a, b).
+//
+// top-k match = intersection(top_k_idx(a), top_k_idx(b)) / k
+// mean top-k match = sum(top-k match) / num_data
+//
+// For example,
+// num_data = 2
+// first model output = [1, 2, 3], [2, 3, 1]
+// second model output = [2, 4, 6], [3, 2, 1]
+//
+// if k = 1,
+// first model top-1 index = ([2], [1])
+// second model top-1 index = ([2], [0])
+// mean top-1 accuracy = (1 + 0) / 2 = 0.5
+//
+// if k = 2,
+// first model output = [1, 2, 3], [2, 3, 1]
+// second model output = [2, 4, 6], [3, 2, 1]
+// first model top-2 index = ([2, 1], [1, 0])
+// second model top-2 index = ([2, 1], [0, 1])
+// mean top-2 accuracy = (2 + 2) / 4 = 1
+//
+// NOTE Order of elements is ignored when comparing two top-k sets.
+// NOTE If two elements have the same value and only one can be included in top-k,
+// the one with an earlier index will be included.
+class TopKMatchPrinter : public MetricPrinter
+{
+public:
+  TopKMatchPrinter(uint32_t k) : _k(k) {}
+
+public:
+  void init(const luci::Module *first, const luci::Module *second);
+
+  void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                  const std::vector<std::shared_ptr<Tensor>> &second);
+
+  void dump(std::ostream &os) const;
+
+private:
+  void accum_topk_accuracy(uint32_t index, const std::shared_ptr<Tensor> &a,
+                           const std::shared_ptr<Tensor> &b);
+
+  // Return true if the output is in the skip list (_skip_output)
+  bool in_skip_list(uint32_t output_index) const;
+
+private:
+  const uint32_t _k = 0;
+  // Store accumulated accuracy
+  std::vector<float> _intermediate;
+  std::vector<std::string> _output_names;
+  uint32_t _num_data = 0;
+  // Save index of output whose num_elements is less than k
+  std::vector<uint32_t> _skip_output;
+};
+
 } // namespace circle_eval_diff
 
 #endif // __CIRCLE_EVAL_DIFF_METRIC_PRINTER_H__
diff --git a/compiler/circle-eval-diff/src/MetricPrinter.test.cpp b/compiler/circle-eval-diff/src/MetricPrinter.test.cpp
index 51ca89799..0e71b80cc 100644
--- a/compiler/circle-eval-diff/src/MetricPrinter.test.cpp
+++ b/compiler/circle-eval-diff/src/MetricPrinter.test.cpp
@@ -180,6 +180,23 @@ std::shared_ptr<Tensor> output_tensor_with_value(const luci::Module *module, flo
   return tensor;
 }
 
+std::shared_ptr<Tensor> output_tensor_with_value(const luci::Module *module,
+                                                 std::vector<float> &value)
+{
+  auto outputs = loco::output_nodes(module->graph());
+  assert(outputs.size() == 1);
+  auto output = *outputs.begin();
+  auto output_cnode = loco::must_cast<luci::CircleNode *>(output);
+  auto tensor = create_empty_tensor(output_cnode);
+  auto tensor_size = tensor->size<loco::DataType::FLOAT32>();
+  assert(tensor_size == value.size());
+  for (uint32_t i = 0; i < tensor_size; i++)
+  {
+    tensor->at<loco::DataType::FLOAT32>(i) = value[i];
+  }
+  return tensor;
+}
+
 } // namespace
 
 namespace circle_eval_diff
@@ -233,4 +250,299 @@ TEST(CircleEvalMetricPrinterTest, MAE_init_with_null_NEG)
   EXPECT_ANY_THROW(mae.init(nullptr, nullptr));
 }
 
+TEST(CircleEvalMetricPrinterTest, MAPE_simple)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  MAPEPrinter mape;
+
+  mape.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    auto output = output_tensor_with_value(&first, 2.0);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    auto output = output_tensor_with_value(&second, 1.0);
+    second_result.emplace_back(output);
+  }
+
+  mape.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  mape.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_NE(std::string::npos, result.find("MAPE for output_0 is 50%"));
+}
+
+TEST(CircleEvalMetricPrinterTest, MAPE_init_with_null_NEG)
+{
+  MAPEPrinter mape;
+
+  EXPECT_ANY_THROW(mape.init(nullptr, nullptr));
+}
+
+TEST(CircleEvalMetricPrinterTest, MPEIR_simple)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  MPEIRPrinter mpeir;
+
+  mpeir.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    std::vector<float> val;
+    val.resize(16);
+    for (uint32_t i = 0; i < 16; i++)
+      val[i] = i;
+
+    auto output = output_tensor_with_value(&first, val);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    auto output = output_tensor_with_value(&second, 0.0);
+    second_result.emplace_back(output);
+  }
+
+  mpeir.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  mpeir.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_NE(std::string::npos, result.find("MPEIR for output_0 is 1"));
+}
+
+TEST(CircleEvalMetricPrinterTest, MPEIR_init_with_null_NEG)
+{
+  MPEIRPrinter mpeir;
+
+  EXPECT_ANY_THROW(mpeir.init(nullptr, nullptr));
+}
+
+TEST(CircleEvalMetricPrinterTest, TopK_simple)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  TopKMatchPrinter top5(5);
+
+  top5.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    std::vector<float> val;
+    val.resize(16);
+    for (uint32_t i = 0; i < 16; i++)
+      val[i] = i;
+
+    auto output = output_tensor_with_value(&first, val);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    std::vector<float> val;
+    val.resize(16);
+    for (uint32_t i = 0; i < 16; i++)
+      val[i] = i * 2;
+    auto output = output_tensor_with_value(&second, val);
+    second_result.emplace_back(output);
+  }
+
+  top5.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  top5.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_NE(std::string::npos, result.find("Mean Top-5 match ratio for output_0 is 1"));
+}
+
+TEST(CircleEvalMetricPrinterTest, TopK_tie)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  TopKMatchPrinter top5(5);
+
+  top5.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    std::vector<float> val;
+    val.resize(16);
+    for (uint32_t i = 0; i < 16; i++)
+      val[i] = i;
+
+    auto output = output_tensor_with_value(&first, val);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    std::vector<float> val{12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 14, 15, 16};
+
+    auto output = output_tensor_with_value(&second, val);
+    second_result.emplace_back(output);
+  }
+
+  top5.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  top5.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_NE(std::string::npos, result.find("Mean Top-5 match ratio for output_0 is 0.8"));
+}
+
+TEST(CircleEvalMetricPrinterTest, TopK_num_elem_less_than_k_NEG)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  TopKMatchPrinter top100(100);
+
+  top100.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    auto output = output_tensor_with_value(&first, 0);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    auto output = output_tensor_with_value(&second, 0);
+    second_result.emplace_back(output);
+  }
+
+  top100.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  top100.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_EQ(std::string::npos, result.find("Mean Top-100 match ratio"));
+}
+
+TEST(CircleEvalMetricPrinterTest, TopK_init_with_null_NEG)
+{
+  TopKMatchPrinter topk(5);
+
+  EXPECT_ANY_THROW(topk.init(nullptr, nullptr));
+}
+
+TEST(CircleEvalMetricPrinterTest, MSE_simple)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  MSEPrinter mse;
+
+  mse.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    auto output = output_tensor_with_value(&first, 1.0);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    auto output = output_tensor_with_value(&second, 2.0);
+    second_result.emplace_back(output);
+  }
+
+  mse.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  mse.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_NE(std::string::npos, result.find("MSE for output_0 is 1"));
+}
+
+TEST(CircleEvalMetricPrinterTest, MSE_init_with_null_NEG)
+{
+  MSEPrinter mse;
+
+  EXPECT_ANY_THROW(mse.init(nullptr, nullptr));
+}
+
 } // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp b/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp
deleted file mode 100644
index 85f985873..000000000
--- a/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ModuleEvalDiff.h"
-#include "Tensor.h"
-
-#include <luci_interpreter/Interpreter.h>
-#include <dio_hdf5/HDF5Importer.h>
-
-#include <string>
-#include <stdexcept>
-#include <iostream>
-#include <cassert>
-
-using Tensor = circle_eval_diff::Tensor;
-using DataType = loco::DataType;
-using Shape = std::vector<loco::Dimension>;
-using HDF5Importer = dio::hdf5::HDF5Importer;
-
-namespace
-{
-
-// Check the type and the shape of CircleInput
-void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype, const Shape &shape)
-{
-  // Type check
-  if (dtype != input_node->dtype())
-    throw std::runtime_error("Wrong input type.");
-
-  if (shape.size() != input_node->rank())
-    throw std::runtime_error("Input rank mismatch.");
-
-  for (uint32_t i = 0; i < shape.size(); i++)
-  {
-    if (not(shape.at(i) == input_node->dim(i)))
-      throw std::runtime_error("Input shape mismatch.");
-  }
-}
-
-// Return number of elements of the node.
-uint32_t numElements(const luci::CircleNode *node)
-{
-  uint32_t num_elem = 1;
-  for (uint32_t i = 0; i < node->rank(); ++i)
-    num_elem *= node->dim(i).value();
-  return num_elem;
-}
-
-// Return Tensor which has the same dtype and shape with node.
-// Buffer does not have any data yet.
-std::shared_ptr<Tensor> createEmptyTensor(const luci::CircleNode *node)
-{
-  auto tensor = std::make_shared<Tensor>();
-  {
-    tensor->dtype(node->dtype());
-    tensor->rank(node->rank());
-    for (uint32_t i = 0; i < node->rank(); i++)
-      tensor->dim(i) = node->dim(i);
-
-    switch (node->dtype())
-    {
-      case loco::DataType::FLOAT32:
-        tensor->size<loco::DataType::FLOAT32>(numElements(node));
-        break;
-      case loco::DataType::U8:
-        tensor->size<loco::DataType::U8>(numElements(node));
-        break;
-      case loco::DataType::S16:
-        tensor->size<loco::DataType::S16>(numElements(node));
-        break;
-      case loco::DataType::S32:
-        tensor->size<loco::DataType::S32>(numElements(node));
-        break;
-      case loco::DataType::S64:
-        tensor->size<loco::DataType::S64>(numElements(node));
-        break;
-      default:
-        throw std::runtime_error("Unsupported input tensor dtype for " + node->name());
-    }
-  }
-
-  return tensor;
-}
-
-} // namespace
-
-namespace circle_eval_diff
-{
-
-void H5InputEvalDiff::evalDiff(const std::string &first_input_data_path,
-                               const std::string &second_input_data_path) const
-{
-  const auto interp = std::make_unique<luci_interpreter::Interpreter>(_first_module.get());
-
-  _metric->init(_first_module.get(), _second_module.get());
-
-  try
-  {
-    HDF5Importer first_h5(first_input_data_path);
-    first_h5.importGroup("value");
-
-    HDF5Importer second_h5(second_input_data_path);
-    second_h5.importGroup("value");
-
-    const auto first_num_data = first_h5.numData();
-    const auto second_num_data = second_h5.numData();
-
-    if (first_num_data != second_num_data)
-      throw std::runtime_error(
-        "Number of data in the first data file and the second data file mismatches.");
-
-    if (first_num_data == 0)
-      throw std::runtime_error("Input data file does not contain any record.");
-
-    const auto first_input_nodes = loco::input_nodes(_first_module->graph());
-    const auto first_num_inputs = first_input_nodes.size();
-    const auto first_output_nodes = loco::output_nodes(_first_module->graph());
-    const auto first_num_outputs = first_output_nodes.size();
-
-    const auto second_input_nodes = loco::input_nodes(_second_module->graph());
-    const auto second_num_inputs = second_input_nodes.size();
-    const auto second_output_nodes = loco::output_nodes(_second_module->graph());
-    const auto second_num_outputs = second_output_nodes.size();
-
-    for (int32_t data_idx = 0; data_idx < first_num_data; data_idx++)
-    {
-      std::cout << "Evaluating " << data_idx << "'th data" << std::endl;
-
-      if (first_num_inputs != first_h5.numInputs(data_idx) ||
-          second_num_inputs != second_h5.numInputs(data_idx))
-        throw std::runtime_error("Wrong number of inputs in " + std::to_string(data_idx) +
-                                 "th data.");
-
-      // Do inference and return output
-      auto eval = [&](HDF5Importer &h5, uint32_t num_inputs,
-                      const std::vector<loco::Node *> &input_nodes, uint32_t num_outputs,
-                      const std::vector<loco::Node *> &output_nodes) {
-        // Write input data
-        for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
-        {
-          const auto *input_node =
-            loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
-          assert(input_node->index() == input_idx);
-
-          auto tensor = createEmptyTensor(input_node);
-          if (h5.isRawData())
-          {
-            h5.readTensor(data_idx, input_idx, tensor->buffer());
-          }
-          else
-          {
-            DataType dtype;
-            Shape shape;
-            h5.readTensor(data_idx, input_idx, &dtype, &shape, tensor->buffer());
-
-            // Check the type and the shape of the input data is valid
-            verifyTypeShape(input_node, dtype, shape);
-          }
-
-          interp->writeInputTensor(input_node, tensor->buffer(), tensor->byte_size());
-        }
-
-        // Interpret
-        interp->interpret();
-
-        // Read output data
-        std::vector<std::shared_ptr<Tensor>> outputs;
-        for (uint32_t output_idx = 0; output_idx < num_outputs; output_idx++)
-        {
-          const auto *output_node =
-            loco::must_cast<const luci::CircleOutput *>(output_nodes[output_idx]);
-          assert(output_node->index() == output_idx);
-
-          auto tensor = createEmptyTensor(output_node);
-          interp->readOutputTensor(output_node, tensor->buffer(), tensor->byte_size());
-          outputs.emplace_back(tensor);
-        }
-
-        return outputs;
-      };
-
-      auto first_output =
-        eval(first_h5, first_num_inputs, first_input_nodes, first_num_outputs, first_output_nodes);
-      auto second_output = eval(second_h5, second_num_inputs, second_input_nodes,
-                                second_num_outputs, second_output_nodes);
-
-      // Accumulate diffs
-      _metric->accumulate(first_output, second_output);
-    }
-
-    std::cout << "Evaluation finished. Number of data: " << first_num_data << std::endl;
-  }
-  catch (const H5::Exception &e)
-  {
-    H5::Exception::printErrorStack();
-    throw std::runtime_error("HDF5 error occurred.");
-  }
-
-  // Print metric
-  std::cout << _metric.get() << std::endl;
-}
-
-} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/ModuleEvalDiff.h b/compiler/circle-eval-diff/src/ModuleEvalDiff.h
deleted file mode 100644
index c7642f60b..000000000
--- a/compiler/circle-eval-diff/src/ModuleEvalDiff.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__
-#define __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__
-
-#include "MetricPrinter.h"
-
-#include <luci/IR/Module.h>
-
-#include <memory>
-
-namespace circle_eval_diff
-{
-
-class ModuleEvalDiff
-{
-public:
-  ModuleEvalDiff(std::unique_ptr<luci::Module> &&first, std::unique_ptr<luci::Module> &&second,
-                 std::unique_ptr<MetricPrinter> &&metric)
-    : _first_module(std::move(first)), _second_module(std::move(second)), _metric(std::move(metric))
-  {
-  }
-
-  virtual ~ModuleEvalDiff() = default;
-
-  // Implement this in the child class
-  virtual void evalDiff(const std::string &first_input_data_path,
-                        const std::string &second_input_data_path) const = 0;
-
-protected:
-  std::unique_ptr<luci::Module> _first_module;
-  std::unique_ptr<luci::Module> _second_module;
-  std::unique_ptr<MetricPrinter> _metric;
-};
-
-class H5InputEvalDiff final : public ModuleEvalDiff
-{
-public:
-  H5InputEvalDiff(std::unique_ptr<luci::Module> &&first, std::unique_ptr<luci::Module> &&second,
-                  std::unique_ptr<MetricPrinter> &&metric)
-    : ModuleEvalDiff(std::move(first), std::move(second), std::move(metric))
-  {
-  }
-
-  void evalDiff(const std::string &first_input_data_path,
-                const std::string &second_input_data_path) const;
-};
-
-// TODO Implement ModuleEvalDiff for random input and directory input
-
-} // namespace circle_eval_diff
-
-#endif // __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__
diff --git a/compiler/circle-eval-diff/src/Tensor.cpp b/compiler/circle-eval-diff/src/Tensor.cpp
index 6710e8c3d..c3efc44cd 100644
--- a/compiler/circle-eval-diff/src/Tensor.cpp
+++ b/compiler/circle-eval-diff/src/Tensor.cpp
@@ -16,8 +16,24 @@
 
 #include "Tensor.h"
 
+#include <luci/IR/CircleNodeDecl.h>
+
 #include <cassert>
 
+namespace
+{
+
+// Return number of elements of the node.
+uint32_t numElements(const luci::CircleNode *node)
+{
+  uint32_t num_elem = 1;
+  for (uint32_t i = 0; i < node->rank(); ++i)
+    num_elem *= node->dim(i).value();
+  return num_elem;
+}
+
+} // namespace
+
 namespace circle_eval_diff
 {
 
@@ -69,4 +85,40 @@ INSTANTIATE(loco::DataType::FLOAT32);
 
 #undef INSTANTIATE
 
+// Return Tensor which has the same dtype and shape with node.
+// Buffer does not have any data yet.
+std::shared_ptr<Tensor> createEmptyTensor(const luci::CircleNode *node)
+{
+  auto tensor = std::make_shared<Tensor>();
+  {
+    tensor->dtype(node->dtype());
+    tensor->rank(node->rank());
+    for (uint32_t i = 0; i < node->rank(); i++)
+      tensor->dim(i) = node->dim(i);
+
+    switch (node->dtype())
+    {
+      case loco::DataType::FLOAT32:
+        tensor->size<loco::DataType::FLOAT32>(numElements(node));
+        break;
+      case loco::DataType::U8:
+        tensor->size<loco::DataType::U8>(numElements(node));
+        break;
+      case loco::DataType::S16:
+        tensor->size<loco::DataType::S16>(numElements(node));
+        break;
+      case loco::DataType::S32:
+        tensor->size<loco::DataType::S32>(numElements(node));
+        break;
+      case loco::DataType::S64:
+        tensor->size<loco::DataType::S64>(numElements(node));
+        break;
+      default:
+        throw std::runtime_error("Unsupported input tensor dtype for " + node->name());
+    }
+  }
+
+  return tensor;
+}
+
 } // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/Tensor.h b/compiler/circle-eval-diff/src/Tensor.h
index 65ab60638..d4f65d951 100644
--- a/compiler/circle-eval-diff/src/Tensor.h
+++ b/compiler/circle-eval-diff/src/Tensor.h
@@ -18,6 +18,7 @@
 #define __CIRCLE_EVAL_DIFF_TENSOR_H__
 
 #include <loco.h>
+#include <luci/IR/CircleNodeDecl.h>
 
 #include <vector>
 
@@ -76,6 +77,8 @@ private:
   std::vector<uint8_t> _data;
 };
 
+std::shared_ptr<Tensor> createEmptyTensor(const luci::CircleNode *node);
+
 } // namespace circle_eval_diff
 
 #endif // __CIRCLE_EVAL_DIFF_TENSOR_H__
diff --git a/compiler/circle-eval-diff/src/Tensor.test.cpp b/compiler/circle-eval-diff/src/Tensor.test.cpp
index 3bdeaecdf..395865748 100644
--- a/compiler/circle-eval-diff/src/Tensor.test.cpp
+++ b/compiler/circle-eval-diff/src/Tensor.test.cpp
@@ -18,6 +18,8 @@
 
 #include <gtest/gtest.h>
 
+#include <luci/IR/CircleNodes.h>
+
 using Tensor = circle_eval_diff::Tensor;
 
 namespace
@@ -99,3 +101,29 @@ TEST(CircleEvalDiffTensorTest, out_of_buffer_range_NEG)
 
   SUCCEED();
 }
+
+TEST(CircleEvalDiffTensorTest, createEmptyTensorTest)
+{
+  luci::CircleInput input;
+  input.dtype(loco::DataType::FLOAT32);
+  input.rank(4);
+  input.dim(0).set(1);
+  input.dim(1).set(3);
+  input.dim(2).set(3);
+  input.dim(3).set(2);
+
+  loco::DataType right_data_type{loco::DataType::FLOAT32};
+  std::vector<loco::Dimension> right_shape;
+  right_shape.emplace_back(1);
+  right_shape.emplace_back(3);
+  right_shape.emplace_back(3);
+  right_shape.emplace_back(2);
+
+  auto tensor = circle_eval_diff::createEmptyTensor(&input);
+  EXPECT_EQ(loco::DataType::FLOAT32, tensor->dtype());
+  EXPECT_EQ(4, tensor->rank());
+  EXPECT_EQ(1, tensor->dim(0));
+  EXPECT_EQ(3, tensor->dim(1));
+  EXPECT_EQ(3, tensor->dim(2));
+  EXPECT_EQ(2, tensor->dim(3));
+}
diff --git a/compiler/circle-execution-plan/CMakeLists.txt b/compiler/circle-execution-plan/CMakeLists.txt
index 2f657c171..da74e021d 100644
--- a/compiler/circle-execution-plan/CMakeLists.txt
+++ b/compiler/circle-execution-plan/CMakeLists.txt
@@ -1,3 +1,9 @@
+nnas_find_package(Jsoncpp)
+if(NOT Jsoncpp_FOUND)
+    message(STATUS "Build circle-execution-plan: FAILED (missing jsoncpp)")
+    return()
+endif(NOT Jsoncpp_FOUND)
+
 set(SOURCES
         pal/IScratchpadHelper.h
         pal/ScratchpadHelperLinux.h
@@ -10,6 +16,9 @@ set(SOURCES
         )
 
 add_executable(circle_execution_plan "${SOURCES}")
+target_include_directories(circle_execution_plan PRIVATE ${Jsoncpp_INCLUDE_DIRS})
+
+target_link_libraries(circle_execution_plan ${Jsoncpp_STATIC_LIB})
 target_link_libraries(circle_execution_plan foder)
 target_link_libraries(circle_execution_plan safemain)
 target_link_libraries(circle_execution_plan luci_env)
diff --git a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
index 1788124c3..d5ddf0ce9 100644
--- a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
+++ b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
@@ -33,20 +33,22 @@ int entry(int argc, char **argv)
 {
   arser::Arser arser("circle_execution_plan provides model with execution plan meta information");
 
-  arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
-  arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
-  arser.add_argument("--platform")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(false)
-    .default_value("linux")
-    .help("Platform name: linux mcu cmsisnn");
+  arser.add_argument("input").help("Input circle model");
+  arser.add_argument("output").help("Output circle model");
+  arser.add_argument("--platform").default_value("linux").help("Platform name: linux mcu cmsisnn");
   arser.add_argument("--use_dsp")
     .nargs(1)
     .type(arser::DataType::BOOL)
     .required(false)
     .default_value(false)
     .help("Plan with or without dsp (now can be used only with cmsisnn)");
+  arser.add_argument("--save_allocations")
+    .nargs(1)
+    .required(false)
+    .default_value("")
+    .help("Path for output JSON file to save memory allocation info. "
+          "Note: path end of file should have 'tracealloc.json' (example path: "
+          "'../exec_plan_info.tracealloc.json')");
 
   try
   {
@@ -63,6 +65,7 @@ int entry(int argc, char **argv)
   const std::string output_path = arser.get<std::string>("output");
   const std::string platform_name = arser.get<std::string>("--platform");
   const bool use_dsp = arser.get<bool>("--use_dsp");
+  const std::string json_path = arser.get<std::string>("--save_allocations");
 
   if (platform_name != "cmsisnn" && use_dsp)
   {
@@ -89,6 +92,13 @@ int entry(int argc, char **argv)
     return EXIT_FAILURE;
   }
 
+  bool is_save_allocations = false;
+
+  if (!json_path.empty())
+  {
+    is_save_allocations = true;
+  }
+
   foder::FileLoader file_loader{input_path};
   std::vector<char> model_data;
 
@@ -124,6 +134,9 @@ int entry(int argc, char **argv)
   circle_planner::ExecutionPlanner execution_planner(module->graph(), {platform_type, use_dsp});
   execution_planner.make_execution_plan();
 
+  if (is_save_allocations)
+    execution_planner.create_json_allocation_file(json_path);
+
   // Export to output Circle file
   luci::CircleExporter exporter;
   luci::CircleFileExpContract contract(module.get(), output_path);
diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
index ec2ec1362..a1e6f7e1a 100644
--- a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
+++ b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
@@ -18,6 +18,9 @@
 #include <loco/IR/Algorithm.h>
 #include <luci/UserSettings.h>
 
+#include <json.h>
+#include <fstream>
+
 namespace circle_planner
 {
 namespace
@@ -58,6 +61,29 @@ bool isTensorProducingNode(const luci::CircleNode *node)
   }
 }
 
+// Create allocation node part for current circle node for json allocation info file
+void create_allocation_node(Json::Value &allocations_node,
+                            AllocationNodeInformation &alloca_node_inform, uint32_t alive_till_max,
+                            luci::CircleNode *circle_node)
+{
+  Json::Value allocation_node;
+  if (alloca_node_inform.size == 0)
+    return;
+
+  allocation_node["offset"] = alloca_node_inform.offset;
+  allocation_node["size"] = alloca_node_inform.size;
+  allocation_node["alive_from"] = alloca_node_inform.first_node;
+
+  if (alloca_node_inform.last_node == node_not_assigned)
+    allocation_node["alive_till"] = alive_till_max + 1;
+  else
+    allocation_node["alive_till"] = alloca_node_inform.last_node;
+
+  allocation_node["origin"] = circle_node->name();
+
+  allocations_node.append(allocation_node);
+}
+
 } // namespace
 
 void ExecutionPlanner::make_execution_plan()
@@ -74,6 +100,50 @@ void ExecutionPlanner::make_execution_plan()
   settings->set(luci::UserSettings::Key::ExecutionPlanGen, true);
 }
 
+void ExecutionPlanner::create_json_allocation_file(const std::string &json_path)
+{
+  Json::Value main_tree;
+  Json::Value segments_node;
+  Json::Value allocations_node;
+
+  uint32_t alive_till_max = 0;
+
+  // Find max dealloc value to assign to nodes with node_not_assigned value
+  for (const auto elem : _dealloc_node)
+  {
+    if (alive_till_max < elem and elem != node_not_assigned)
+      alive_till_max = elem;
+  }
+
+  for (auto &alloc_node_inform : _alloc_node_inform_vector)
+  {
+    const auto node_num = alloc_node_inform.node_num;
+    const auto circle_node = loco::must_cast<luci::CircleNode *>(_ordered_nodes[node_num]);
+
+    create_allocation_node(allocations_node, alloc_node_inform, alive_till_max, circle_node);
+  }
+
+  // Create segment part
+  Json::Value segment_node;
+  segment_node["name"] = "Segment1";
+  segment_node["allocations"] = allocations_node;
+  segments_node.append(segment_node);
+
+  main_tree["schema_version"] = 1;
+  main_tree["segments"] = segments_node;
+
+  Json::StreamWriterBuilder builder;
+  const std::unique_ptr<Json::StreamWriter> writer(builder.newStreamWriter());
+
+  // Write to json file
+  std::ofstream out;
+  out.open(json_path);
+  if (out.is_open())
+  {
+    writer->write(main_tree, &out);
+  }
+}
+
 void ExecutionPlanner::get_default_execution_order_plan()
 {
   // Get execution order in _ordered_nodes
diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.h b/compiler/circle-execution-plan/src/ExecutionPlanner.h
index e0833c407..af3fba33e 100644
--- a/compiler/circle-execution-plan/src/ExecutionPlanner.h
+++ b/compiler/circle-execution-plan/src/ExecutionPlanner.h
@@ -104,6 +104,8 @@ public:
     _is_null_scratchpads = is_null_scratchpads;
   };
 
+  void create_json_allocation_file(const std::string &json_path);
+
 private:
   // Method gets default execution order plan and saves it in _ordered_nodes vector.
   // There can be different variants of execution order and this method provides main one.
diff --git a/compiler/circle-inspect/driver/Driver.cpp b/compiler/circle-inspect/driver/Driver.cpp
index 10e185de5..318a5826b 100644
--- a/compiler/circle-inspect/driver/Driver.cpp
+++ b/compiler/circle-inspect/driver/Driver.cpp
@@ -36,7 +36,7 @@ int entry(int argc, char **argv)
     .help("Dump Conv2D series weight operators in circle file");
   arser.add_argument("--op_version").nargs(0).help("Dump versions of the operators in circle file");
   arser.add_argument("--tensor_dtype").nargs(0).help("Dump dtype of tensors");
-  arser.add_argument("circle").type(arser::DataType::STR).help("Circle file to inspect");
+  arser.add_argument("circle").help("Circle file to inspect");
 
   try
   {
diff --git a/compiler/circle-inspect/requires.cmake b/compiler/circle-inspect/requires.cmake
index 362d67cf4..183dfe227 100644
--- a/compiler/circle-inspect/requires.cmake
+++ b/compiler/circle-inspect/requires.cmake
@@ -1,3 +1,4 @@
 require("arser")
+require("foder")
 require("mio-circle04")
 require("safemain")
diff --git a/compiler/circle-inspect/src/Dump.cpp b/compiler/circle-inspect/src/Dump.cpp
index bba5e56c3..aa8fed248 100644
--- a/compiler/circle-inspect/src/Dump.cpp
+++ b/compiler/circle-inspect/src/Dump.cpp
@@ -15,7 +15,9 @@
  */
 
 #include "Dump.h"
-#include "Reader.h"
+
+#include <mio_circle/Helper.h>
+#include <mio_circle/Reader.h>
 
 #include <ostream>
 
@@ -24,7 +26,7 @@ namespace circleinspect
 
 void DumpOperators::run(std::ostream &os, const circle::Model *model)
 {
-  circleinspect::Reader reader(model);
+  mio::circle::Reader reader(model);
 
   const uint32_t subgraph_size = reader.num_subgraph();
 
@@ -50,7 +52,7 @@ void DumpOperators::run(std::ostream &os, const circle::Model *model)
 namespace
 {
 
-const circle::Operator *operator_match_output(circleinspect::Reader &reader, const int32_t tensor)
+const circle::Operator *operator_match_output(mio::circle::Reader &reader, const int32_t tensor)
 {
   auto ops = reader.operators();
 
@@ -58,7 +60,7 @@ const circle::Operator *operator_match_output(circleinspect::Reader &reader, con
   {
     const auto op = ops->Get(i);
 
-    const std::vector<int32_t> &outputs = circleinspect::as_index_vector(op->outputs());
+    const std::vector<int32_t> &outputs = mio::circle::as_index_vector(op->outputs());
 
     for (auto output : outputs)
     {
@@ -69,7 +71,7 @@ const circle::Operator *operator_match_output(circleinspect::Reader &reader, con
   return nullptr;
 }
 
-size_t tensor_buffer_size(circleinspect::Reader &reader, const int32_t tensor_id)
+size_t tensor_buffer_size(mio::circle::Reader &reader, const int32_t tensor_id)
 {
   auto tensors = reader.tensors();
 
@@ -93,7 +95,7 @@ namespace circleinspect
 
 void DumpConv2DWeight::run(std::ostream &os, const circle::Model *model)
 {
-  circleinspect::Reader reader(model);
+  mio::circle::Reader reader(model);
 
   const uint32_t subgraph_size = reader.num_subgraph();
 
@@ -110,7 +112,7 @@ void DumpConv2DWeight::run(std::ostream &os, const circle::Model *model)
 
       if (bc == circle::BuiltinOperator_CONV_2D || bc == circle::BuiltinOperator_DEPTHWISE_CONV_2D)
       {
-        const std::vector<int32_t> &inputs = circleinspect::as_index_vector(op->inputs());
+        const std::vector<int32_t> &inputs = mio::circle::as_index_vector(op->inputs());
         if (inputs.size() < 2)
         {
           throw std::runtime_error("Operator has invalid input");
@@ -147,7 +149,7 @@ void DumpOperatorVersion::run(std::ostream &os, const circle::Model *model)
 {
   std::map<std::string, int32_t> op_version_map;
 
-  circleinspect::Reader reader(model);
+  mio::circle::Reader reader(model);
 
   // This assert is subject to be changed later
   assert(reader.num_subgraph() == 1);
@@ -181,7 +183,7 @@ namespace circleinspect
 
 void DumpTensorDType::run(std::ostream &os, const circle::Model *model)
 {
-  circleinspect::Reader reader(model);
+  mio::circle::Reader reader(model);
 
   const uint32_t subgraph_size = reader.num_subgraph();
 
diff --git a/compiler/circle-inspect/src/Reader.cpp b/compiler/circle-inspect/src/Reader.cpp
deleted file mode 100644
index 0e2865254..000000000
--- a/compiler/circle-inspect/src/Reader.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Reader.h"
-
-#include <mio_circle/Helper.h>
-
-#include <sstream>
-#include <string>
-
-namespace circleinspect
-{
-
-Reader::Reader(const circle::Model *model)
-{
-  _subgraphs = model->subgraphs();
-  _buffers = model->buffers();
-
-  auto opcodes = model->operator_codes();
-  for (const ::circle::OperatorCode *opcode : *opcodes)
-  {
-    _op_codes.push_back(opcode);
-  }
-}
-
-size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data)
-{
-  if (buff_data != nullptr)
-  {
-    *buff_data = nullptr;
-  }
-
-  if (buf_idx == 0)
-    return 0;
-
-  if (auto *buffer = (*_buffers)[buf_idx])
-  {
-    if (auto *array = buffer->data())
-    {
-      if (size_t size = array->size())
-      {
-        if (buff_data != nullptr)
-        {
-          *buff_data = reinterpret_cast<const uint8_t *>(array->data());
-        }
-        return size;
-      }
-    }
-  }
-
-  return 0;
-}
-
-circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const
-{
-  uint32_t index = op->opcode_index();
-  assert(index < _op_codes.size());
-  const circle::OperatorCode *opcode = _op_codes.at(index);
-
-  return mio::circle::builtin_code_neutral(opcode);
-}
-
-std::string Reader::opcode_name(const circle::Operator *op) const
-{
-  uint32_t index = op->opcode_index();
-  assert(index < _op_codes.size());
-  const circle::OperatorCode *opcode = _op_codes.at(index);
-
-  if (!mio::circle::is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid: " << index << ")";
-    return oss.str();
-  }
-
-  return mio::circle::opcode_name(opcode);
-}
-
-std::string Reader::tensor_name(const circle::Tensor *tensor) const
-{
-  return mio::circle::tensor_name(tensor);
-}
-
-std::string Reader::tensor_dtype(const circle::Tensor *tensor) const
-{
-  return mio::circle::tensor_type(tensor);
-}
-
-bool Reader::select_subgraph(uint32_t sgindex)
-{
-  _tensors = nullptr;
-  _operators = nullptr;
-
-  _inputs.clear();
-  _outputs.clear();
-
-  if (_subgraphs->Length() <= sgindex)
-  {
-    assert(false);
-    return false;
-  }
-
-  const circle::SubGraph *subgraph = (*_subgraphs)[sgindex];
-
-  _tensors = subgraph->tensors();
-  _operators = subgraph->operators();
-
-  _inputs = as_index_vector(subgraph->inputs());
-  _outputs = as_index_vector(subgraph->outputs());
-
-  return true;
-}
-
-} // namespace circleinspect
diff --git a/compiler/circle-inspect/src/Reader.h b/compiler/circle-inspect/src/Reader.h
deleted file mode 100644
index c38ec3990..000000000
--- a/compiler/circle-inspect/src/Reader.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __READER_H__
-#define __READER_H__
-
-#include <mio/circle/schema_generated.h>
-
-#include <map>
-#include <string>
-#include <vector>
-
-namespace circleinspect
-{
-
-template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
-{
-  std::vector<T> ret(flat_array->Length());
-  for (uint32_t i = 0; i < flat_array->Length(); i++)
-  {
-    ret[i] = flat_array->Get(i);
-  }
-  return ret;
-}
-
-/**
- * @brief Loads Circle file and provides helpers to access attributes
- */
-class Reader
-{
-private:
-  using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>;
-  using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>;
-  using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
-  using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>;
-
-public:
-  Reader(const circle::Model *model);
-
-  Reader() = delete;
-
-public:
-  const std::vector<const circle::OperatorCode *> &opcodes() { return _op_codes; }
-  const CircleBuffers_t *buffers() { return _buffers; }
-  const CircleTensors_t *tensors() { return _tensors; }
-  const CircleOperators_t *operators() { return _operators; }
-  const std::vector<int32_t> &inputs() const { return _inputs; }
-  const std::vector<int32_t> &outputs() const { return _outputs; }
-
-  uint32_t num_subgraph() const { return _subgraphs->Length(); }
-
-  size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
-  circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
-  std::string opcode_name(const circle::Operator *op) const;
-  std::string tensor_name(const circle::Tensor *tensor) const;
-  std::string tensor_dtype(const circle::Tensor *tensor) const;
-
-public:
-  bool select_subgraph(uint32_t subgraph);
-
-private:
-  const CircleSubGraphs_t *_subgraphs{nullptr};
-  const CircleBuffers_t *_buffers{nullptr};
-  const CircleTensors_t *_tensors{nullptr};
-  const CircleOperators_t *_operators{nullptr};
-
-  std::vector<const circle::OperatorCode *> _op_codes;
-  std::vector<int32_t> _inputs;
-  std::vector<int32_t> _outputs;
-};
-
-} // namespace circleinspect
-
-#endif // __READER_H__
diff --git a/compiler/circle-interpreter/CMakeLists.txt b/compiler/circle-interpreter/CMakeLists.txt
new file mode 100644
index 000000000..d18db3e11
--- /dev/null
+++ b/compiler/circle-interpreter/CMakeLists.txt
@@ -0,0 +1,13 @@
+set(INTERPRETER
+      src/CircleInterpreter.cpp
+   )
+
+add_executable(circle-interpreter ${INTERPRETER})
+target_link_libraries(circle-interpreter PRIVATE arser)
+target_link_libraries(circle-interpreter PRIVATE loco)
+target_link_libraries(circle-interpreter PRIVATE luci_import)
+target_link_libraries(circle-interpreter PRIVATE luci_interpreter)
+target_link_libraries(circle-interpreter PRIVATE safemain)
+target_link_libraries(circle-interpreter PRIVATE vconone)
+
+install(TARGETS circle-interpreter DESTINATION bin)
diff --git a/compiler/circle-interpreter/requires.cmake b/compiler/circle-interpreter/requires.cmake
new file mode 100644
index 000000000..a565df65b
--- /dev/null
+++ b/compiler/circle-interpreter/requires.cmake
@@ -0,0 +1,6 @@
+require("arser")
+require("loco")
+require("luci")
+require("luci-interpreter")
+require("safemain")
+require("vconone")
diff --git a/compiler/circle-interpreter/src/CircleInterpreter.cpp b/compiler/circle-interpreter/src/CircleInterpreter.cpp
new file mode 100644
index 000000000..1d241278d
--- /dev/null
+++ b/compiler/circle-interpreter/src/CircleInterpreter.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <arser/arser.h>
+#include <luci/ImporterEx.h>
+#include <luci_interpreter/Interpreter.h>
+#include <vconone/vconone.h>
+
+#include <cstdlib>
+#include <fstream>
+#include <vector>
+#include <string>
+
+namespace
+{
+
+void readDataFromFile(const std::string &filename, char *data, size_t data_size)
+{
+  std::ifstream fs(filename, std::ifstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+  if (fs.read(data, data_size).fail())
+    throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n");
+}
+
+void writeDataToFile(const std::string &filename, const char *data, size_t data_size)
+{
+  std::ofstream fs(filename, std::ofstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+  if (fs.write(data, data_size).fail())
+  {
+    throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
+  }
+}
+
+template <typename NodeT> size_t getTensorSize(const NodeT *node)
+{
+  uint32_t tensor_size = loco::size(node->dtype());
+  for (uint32_t i = 0; i < node->rank(); ++i)
+    tensor_size *= node->dim(i).value();
+  return tensor_size;
+}
+
+void print_version(void)
+{
+  std::cout << "circle-interpreter version " << vconone::get_string() << std::endl;
+  std::cout << vconone::get_copyright() << std::endl;
+}
+
+} // namespace
+
+/*
+ * @brief CircleInterpreter main
+ *
+ *        Driver to invoke luci-interpreter
+ *
+ */
+int entry(int argc, char **argv)
+{
+  arser::Arser arser("Interpreter driver for circle models");
+
+  arser::Helper::add_version(arser, print_version);
+
+  arser.add_argument("model_path").help("Circle model filepath");
+  arser.add_argument("input_prefix")
+    .help("Input data filepath for circle model. "
+          "n-th input data is read from ${input_prefix}n, "
+          "for example, Add.circle.input0, Add.circle.input1");
+  arser.add_argument("output_prefix")
+    .help("Output data filepath for circle model. "
+          "Output data is written in ${output_file}n, "
+          "for example, Add.circle.output0");
+
+  try
+  {
+    arser.parse(argc, argv);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cout << err.what() << std::endl;
+    std::cout << arser;
+    return EXIT_FAILURE;
+  }
+
+  const auto filename = arser.get<std::string>("model_path");
+  const auto input_prefix = arser.get<std::string>("input_prefix");
+  const auto output_prefix = arser.get<std::string>("output_prefix");
+
+  // Load model from the file
+  luci::ImporterEx importer;
+  std::unique_ptr<luci::Module> module = importer.importVerifyModule(filename);
+  if (module == nullptr)
+  {
+    std::cerr << "ERROR: Failed to load '" << filename << "'" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  // Create interpreter.
+  luci_interpreter::Interpreter interpreter(module.get());
+
+  // Set input.
+  // Data for n'th input is read from ${input_prefix}n
+  // (ex: Add.circle.input0, Add.circle.input1 ..)
+  const auto input_nodes = loco::input_nodes(module->graph());
+  for (int32_t i = 0; i < input_nodes.size(); i++)
+  {
+    const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[i]);
+    std::vector<char> input_data(getTensorSize(input_node));
+    readDataFromFile(std::string(input_prefix) + std::to_string(i), input_data.data(),
+                     input_data.size());
+    interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+  }
+
+  // Do inference.
+  interpreter.interpret();
+
+  // Get output.
+  const auto output_nodes = loco::output_nodes(module->graph());
+  for (int i = 0; i < module->graph()->outputs()->size(); i++)
+  {
+    const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+    std::vector<char> output_data(getTensorSize(output_node));
+    interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+
+    // Output data is written in ${output_file}n
+    // (ex: Add.circle.output0)
+    writeDataToFile(std::string(output_prefix) + std::to_string(i), output_data.data(),
+                    output_data.size());
+  }
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/circle-operator-test/CMakeLists.txt b/compiler/circle-operator-test/CMakeLists.txt
new file mode 100644
index 000000000..2ebd533b9
--- /dev/null
+++ b/compiler/circle-operator-test/CMakeLists.txt
@@ -0,0 +1,18 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+get_target_property(ARTIFACTS_PATH testDataGenerator BINARY_DIR)
+get_target_property(CIRCLE_OPERATOR_PATH circle-operator BINARY_DIR)
+set(CIRCLE_OPERATOR_PATH "${CIRCLE_OPERATOR_PATH}/circle-operator")
+
+nnas_find_package(GTest REQUIRED)
+
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+
+GTest_AddTest(circle-operator-test ${TESTS})
+
+set_tests_properties(circle-operator-test
+                     PROPERTIES
+                     ENVIRONMENT "ARTIFACTS_PATH=${ARTIFACTS_PATH};CIRCLE_OPERATOR_PATH=${CIRCLE_OPERATOR_PATH}"
+                     )
diff --git a/compiler/circle-operator-test/README.md b/compiler/circle-operator-test/README.md
new file mode 100644
index 000000000..d07c64d2e
--- /dev/null
+++ b/compiler/circle-operator-test/README.md
@@ -0,0 +1,7 @@
+# circle-operator-test
+
+_circle-operator-test_ provides test of circle-operator tool is working as expected.
+
+Current tests includes
+- input arguments test is working as expected
+- output of this tool is as expected
diff --git a/compiler/circle-operator-test/requires.cmake b/compiler/circle-operator-test/requires.cmake
new file mode 100644
index 000000000..8ad3b8a64
--- /dev/null
+++ b/compiler/circle-operator-test/requires.cmake
@@ -0,0 +1,2 @@
+require("circle-operator")
+require("common-artifacts")
diff --git a/compiler/circle-operator-test/src/circle-operator.test.cpp b/compiler/circle-operator-test/src/circle-operator.test.cpp
new file mode 100644
index 000000000..29c6f3792
--- /dev/null
+++ b/compiler/circle-operator-test/src/circle-operator.test.cpp
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <cstdlib>
+#include <fstream>
+#include <vector>
+
+class cirlce_operator_test : public ::testing::Test
+{
+protected:
+  bool initialize(void);
+  bool run(const std::string &command);
+
+protected:
+  bool load(const std::string &file);
+
+protected:
+  std::string _artifacts_path;
+  std::string _circle_operator_path;
+  std::string _result;
+};
+
+bool cirlce_operator_test::initialize(void)
+{
+  char *path = std::getenv("ARTIFACTS_PATH");
+  if (path == nullptr)
+  {
+    std::cerr << "ARTIFACTS_PATH not found" << std::endl;
+    return false;
+  }
+  _artifacts_path = path;
+
+  path = std::getenv("CIRCLE_OPERATOR_PATH");
+  if (path == nullptr)
+  {
+    std::cerr << "ARTIFACTS_BIN_PATH not found" << std::endl;
+    return false;
+  }
+  _circle_operator_path = path;
+
+  return true;
+}
+
+bool cirlce_operator_test::run(const std::string &command)
+{
+  std::vector<char> buffer(260);
+  std::string result = "";
+  std::string cmd_err = command + " 2>&1";
+  FILE *pipe = popen(cmd_err.c_str(), "r");
+  if (!pipe)
+  {
+    return false;
+  }
+  try
+  {
+    while (fgets(&buffer[0], buffer.size(), pipe) != NULL)
+    {
+      result += &buffer[0];
+    }
+  }
+  catch (...)
+  {
+    pclose(pipe);
+    return false;
+  }
+  pclose(pipe);
+  _result = result;
+
+  std::cout << _result << std::endl;
+
+  return true;
+}
+
+bool cirlce_operator_test::load(const std::string &file)
+{
+  std::ifstream tmp(file.c_str());
+  if (tmp.fail())
+    return false;
+
+  std::stringstream buffer;
+  buffer << tmp.rdbuf();
+  _result = buffer.str();
+  return true;
+}
+
+TEST_F(cirlce_operator_test, valid_names)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string model = _artifacts_path + "/Add_000.circle";
+  std::string command = _circle_operator_path + " --name " + model;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("ofm");
+  ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(cirlce_operator_test, valid_codes)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string model = _artifacts_path + "/Add_000.circle";
+  std::string command = _circle_operator_path + " --code " + model;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("ADD");
+  ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(cirlce_operator_test, invalid_option_NEG)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string model = _artifacts_path + "/Add_000.circle";
+  std::string command = _circle_operator_path + " --opname " + model;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("Invalid argument");
+  ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(cirlce_operator_test, check_code_name)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string model = _artifacts_path + "/Add_000.circle";
+  std::string command = _circle_operator_path + " --code --name " + model;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("ofm");
+  ASSERT_NE(std::string::npos, pos);
+  const auto pos2 = _result.find("ADD");
+  ASSERT_NE(std::string::npos, pos2);
+}
+
+TEST_F(cirlce_operator_test, nonexist_file_NEG)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string model = _artifacts_path + "/non_exist_file.foo";
+  std::string command = _circle_operator_path + " --name " + model;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("ERROR");
+  ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(cirlce_operator_test, invalid_file_NEG)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string model = _artifacts_path + "/Add_000.recipe";
+  std::string command = _circle_operator_path + " --name " + model;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("ERROR");
+  ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(cirlce_operator_test, output_file)
+{
+  if (!initialize())
+  {
+    FAIL();
+    return;
+  }
+
+  std::string fileName("/tmp/a.txt");
+  std::remove(fileName.c_str());
+  std::string model = _artifacts_path + "/Add_000.circle";
+  std::string command = _circle_operator_path + " --code --output_path " + fileName + " " + model;
+  if (!run(command))
+  {
+    FAIL();
+    return;
+  }
+  if (!load(fileName))
+  {
+    FAIL();
+    return;
+  }
+
+  const auto pos = _result.find("ADD");
+  ASSERT_NE(std::string::npos, pos);
+}
diff --git a/compiler/circle-operator/CMakeLists.txt b/compiler/circle-operator/CMakeLists.txt
new file mode 100644
index 000000000..6817a8618
--- /dev/null
+++ b/compiler/circle-operator/CMakeLists.txt
@@ -0,0 +1,17 @@
+if(NOT TARGET mio_circle04)
+  return()
+endif(NOT TARGET mio_circle04)
+
+set(DRIVER "driver/Driver.cpp")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(circle-operator ${DRIVER} ${SOURCES})
+target_include_directories(circle-operator PRIVATE src)
+target_link_libraries(circle-operator arser)
+target_link_libraries(circle-operator foder)
+target_link_libraries(circle-operator mio_circle04)
+target_link_libraries(circle-operator mio_circle04_helper)
+target_link_libraries(circle-operator safemain)
+
+install(TARGETS circle-operator DESTINATION bin)
diff --git a/compiler/circle-operator/README.md b/compiler/circle-operator/README.md
new file mode 100644
index 000000000..86a923f05
--- /dev/null
+++ b/compiler/circle-operator/README.md
@@ -0,0 +1,70 @@
+# circle-operator
+
+_circle-operator_ allows users to retrieve operators information from a Circle model file
+
+NOTE: this tool is primary for ONE-vscode where PartEditor needs names and codes
+of the operators.
+
+## Information with operators
+
+Operators with `--name`
+- show operator names one line at a time in execution order
+
+Example
+```
+$ circle-operator --name model.circle
+```
+
+Result
+```
+conv1_pad/Pad
+conv1_conv/BiasAdd
+pool1_pad/Pad
+```
+
+Operators codes with `--code`
+- show operator codes one line at a time in execution order
+
+Example
+```
+$ circle-operator --code model.circle
+```
+
+Result
+```
+PAD
+CONV_2D
+PAD
+```
+
+Operators with both `--code` and `--name`
+- show operator both codes and name separated with `,` one line at a time in execution order
+
+Example
+```
+$ circle-operator --code --name model.circle
+```
+
+Result
+```
+PAD,conv1_pad/Pad
+CONV_2D,conv1_conv/BiasAdd
+PAD,pool1_pad/Pad
+```
+
+## Save to file
+
+Use `--output_path` to save results to a file.
+
+Example
+```
+$ circle-operator --name --output_path /tmp/result model.circle
+```
+
+Result
+```
+$ cat /tmp/result
+conv1_pad/Pad
+conv1_conv/BiasAdd
+pool1_pad/Pad
+```
diff --git a/compiler/circle-operator/driver/Driver.cpp b/compiler/circle-operator/driver/Driver.cpp
new file mode 100644
index 000000000..f5fd8073c
--- /dev/null
+++ b/compiler/circle-operator/driver/Driver.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dump.h"
+
+#include <arser/arser.h>
+#include <foder/FileLoader.h>
+#include <fstream>
+
+#include <functional>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <vector>
+#include <string>
+
+#include <signal.h>
+
+void handle_segfault(int signal, siginfo_t *si, void *arg)
+{
+  std::cerr << "ERROR: Failed to load file" << std::endl;
+  exit(255);
+}
+
+int entry(int argc, char **argv)
+{
+  // TODO add option to dump for all sub-graphs
+  arser::Arser arser{
+    "circle-operator allows users to retrieve operator information from a Circle model file"};
+  arser.add_argument("--name").nargs(0).help("Dump operators name in circle file");
+  arser.add_argument("--code").nargs(0).help("Dump operators code in circle file");
+  arser.add_argument("--output_path").help("Save output to file (default output is console)");
+  arser.add_argument("circle").help("Circle file to dump");
+
+  try
+  {
+    arser.parse(argc, argv);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cerr << err.what() << std::endl;
+    std::cerr << arser;
+    return 255;
+  }
+
+  cirops::DumpOption option;
+  option.names = arser["--name"];
+  option.codes = arser["--code"];
+
+  std::ofstream oFstream;
+  std::ostream *oStream = &std::cout;
+  if (arser["--output_path"])
+  {
+    auto output_path = arser.get<std::string>("--output_path");
+    oFstream.open(output_path, std::ofstream::out | std::ofstream::trunc);
+    if (oFstream.fail())
+    {
+      std::cerr << "ERROR: Failed to create output to file " << output_path << std::endl;
+      return 255;
+    }
+    oStream = &oFstream;
+  }
+
+  // hook segment fault
+  struct sigaction sa;
+  memset(&sa, 0, sizeof(struct sigaction));
+  sigemptyset(&sa.sa_mask);
+  sa.sa_sigaction = handle_segfault;
+  sa.sa_flags = SA_SIGINFO;
+  sigaction(SIGSEGV, &sa, NULL);
+
+  std::string modelFile = arser.get<std::string>("circle");
+  // Load Circle model from a circle file
+  try
+  {
+    foder::FileLoader fileLoader{modelFile};
+    std::vector<char> modelData = fileLoader.load();
+    const circle::Model *circleModel = circle::GetModel(modelData.data());
+    if (circleModel == nullptr)
+    {
+      std::cerr << "ERROR: Failed to load circle '" << modelFile << "'" << std::endl;
+      return 255;
+    }
+    cirops::DumpOperators dump;
+    dump.run(*oStream, circleModel, option);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cerr << "ERROR: " << err.what() << std::endl;
+    return 255;
+  }
+
+  if (oFstream.is_open())
+  {
+    oFstream.close();
+  }
+
+  return 0;
+}
diff --git a/compiler/circle-operator/requires.cmake b/compiler/circle-operator/requires.cmake
new file mode 100644
index 000000000..183dfe227
--- /dev/null
+++ b/compiler/circle-operator/requires.cmake
@@ -0,0 +1,4 @@
+require("arser")
+require("foder")
+require("mio-circle04")
+require("safemain")
diff --git a/compiler/circle-operator/src/Dump.cpp b/compiler/circle-operator/src/Dump.cpp
new file mode 100644
index 000000000..36bfe8632
--- /dev/null
+++ b/compiler/circle-operator/src/Dump.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dump.h"
+
+#include <mio_circle/Helper.h>
+#include <mio_circle/Reader.h>
+
+#include <ostream>
+
+namespace
+{
+
+void dump_ops(std::ostream &os, mio::circle::Reader &reader, const cirops::DumpOption &option)
+{
+  auto ops = reader.operators();
+  for (uint32_t i = 0; i < ops->Length(); ++i)
+  {
+    const auto op = ops->Get(i);
+    const auto op_name = reader.opcode_name(op);
+
+    if (option.all_graphs)
+    {
+      // NOTE all_graphs is false for now
+      // TODO check using '$' as split key
+      os << i << "$";
+    }
+
+    if (option.codes)
+    {
+      const auto op_name = reader.opcode_name(op);
+      os << op_name;
+    }
+    if (option.names)
+    {
+      // TODO multiple outputs?
+      const auto tensors = reader.tensors();
+      const auto output_tensors = reader.outputs(op);
+      const auto output = output_tensors.at(0);
+      const auto tensor = tensors->Get(output);
+      const std::string name = mio::circle::tensor_name(tensor);
+      if (option.codes)
+      {
+        os << ",";
+      }
+      os << name;
+    }
+    os << std::endl;
+  }
+}
+
+} // namespace
+
+namespace cirops
+{
+
+void DumpOperators::run(std::ostream &os, const circle::Model *model, const DumpOption &option)
+{
+  mio::circle::Reader reader(model);
+
+  const uint32_t subgraph_size = reader.num_subgraph();
+  for (uint32_t g = 0; g < subgraph_size; g++)
+  {
+    reader.select_subgraph(g);
+    dump_ops(os, reader, option);
+
+    if (!option.all_graphs)
+      break;
+  }
+}
+
+} // namespace cirops
diff --git a/compiler/circle-operator/src/Dump.h b/compiler/circle-operator/src/Dump.h
new file mode 100644
index 000000000..aa1d1be49
--- /dev/null
+++ b/compiler/circle-operator/src/Dump.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DUMP_H__
+#define __DUMP_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <ostream>
+
+namespace cirops
+{
+
+struct DumpOption
+{
+  bool names = false;
+  bool codes = false;
+  bool all_graphs = false;
+};
+
+class DumpOperators
+{
+public:
+  DumpOperators() = default;
+
+public:
+  void run(std::ostream &os, const circle::Model *model, const DumpOption &option);
+};
+
+} // namespace cirops
+
+#endif // __DUMP_H__
diff --git a/compiler/circle-opselector/driver/Driver.cpp b/compiler/circle-opselector/driver/Driver.cpp
index a1ace4f58..4b39a6ddb 100644
--- a/compiler/circle-opselector/driver/Driver.cpp
+++ b/compiler/circle-opselector/driver/Driver.cpp
@@ -159,26 +159,16 @@ int entry(int argc, char **argv)
 
   arser::Arser arser("circle-opselector provides selecting operations in circle model");
 
-  arser.add_argument("--version")
-    .nargs(0)
-    .default_value(false)
-    .help("Show version information and exit")
-    .exit_with(print_version);
+  arser::Helper::add_version(arser, print_version);
 
   // TODO Add new options!
 
-  arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
-  arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
+  arser.add_argument("input").help("Input circle model");
+  arser.add_argument("output").help("Output circle model");
 
   // select option
-  arser.add_argument("--by_id")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .help("Input operation id to select nodes.");
-  arser.add_argument("--by_name")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .help("Input operation name to select nodes.");
+  arser.add_argument("--by_id").help("Input operation id to select nodes.");
+  arser.add_argument("--by_name").help("Input operation name to select nodes.");
 
   try
   {
diff --git a/compiler/circle-part-value-test/CMakeLists.txt b/compiler/circle-part-value-test/CMakeLists.txt
index 0657607d2..ffe1b8909 100644
--- a/compiler/circle-part-value-test/CMakeLists.txt
+++ b/compiler/circle-part-value-test/CMakeLists.txt
@@ -82,7 +82,8 @@ foreach(IDX RANGE ${RECIPE_LENGTH_M1})
 
   # Run partitioner
   add_custom_command(OUTPUT ${PARTITIONER_CONN_JSON}
-    COMMAND circle-partitioner "${PART_FILE}" "${PARTITION_NAME}.circle" "${PARTITIONER_OUTPUT_PATH}"
+    COMMAND circle-partitioner "--part_file" "${PART_FILE}" "--input_file"
+            "${PARTITION_NAME}.circle" "--work_path" "${PARTITIONER_OUTPUT_PATH}"
     DEPENDS circle-partitioner ${PART_DST_PATH} ${CIRCLE_DST_PATH}
     COMMENT "Parition ${RECIPE_NAME}.circle with ${PART_FILE}"
   )
diff --git a/compiler/circle-partitioner-test/CMakeLists.txt b/compiler/circle-partitioner-test/CMakeLists.txt
index e29a66b41..7b26b3ba7 100644
--- a/compiler/circle-partitioner-test/CMakeLists.txt
+++ b/compiler/circle-partitioner-test/CMakeLists.txt
@@ -57,7 +57,8 @@ foreach(IDX RANGE ${RECIPE_LENGTH_M1})
   # Run partitioner
   set(PART_CONN_JSON "${PART_OUT_PATH}/${PART_NAME}.conn.json")
   add_custom_command(OUTPUT ${PART_CONN_JSON}
-    COMMAND circle-partitioner "${PART_FILE}" "${PART_NAME}.circle" "${PART_OUT_PATH}"
+    COMMAND circle-partitioner "--part_file" "${PART_FILE}" "--input_file"
+            "${PART_NAME}.circle" "--work_path" "${PART_OUT_PATH}"
     DEPENDS circle-partitioner ${CIRCLE_DST_PATH} ${PART_DST_PATH}
     COMMENT "Parition ${RECIPE_NAME}.circle with ${PART_FILE}"
   )
diff --git a/compiler/circle-partitioner/CMakeLists.txt b/compiler/circle-partitioner/CMakeLists.txt
index 9b8f5afae..abc5d93fb 100644
--- a/compiler/circle-partitioner/CMakeLists.txt
+++ b/compiler/circle-partitioner/CMakeLists.txt
@@ -1,7 +1,6 @@
 file(GLOB_RECURSE SOURCES "src/*.cpp")
 
 add_executable(circle-partitioner "${SOURCES}")
-target_link_libraries(circle-partitioner foder)
 target_link_libraries(circle-partitioner crew)
 target_link_libraries(circle-partitioner safemain)
 target_link_libraries(circle-partitioner luci_lang)
@@ -17,22 +16,3 @@ target_link_libraries(circle-partitioner vconone)
 target_link_libraries(circle-partitioner nncc_common)
 
 install(TARGETS circle-partitioner DESTINATION bin)
-
-# TODO remove circle_partitioner
-add_executable(circle_partitioner "${SOURCES}")
-target_link_libraries(circle_partitioner foder)
-target_link_libraries(circle_partitioner crew)
-target_link_libraries(circle_partitioner safemain)
-target_link_libraries(circle_partitioner luci_lang)
-target_link_libraries(circle_partitioner luci_log)
-target_link_libraries(circle_partitioner luci_import)
-target_link_libraries(circle_partitioner luci_service)
-target_link_libraries(circle_partitioner luci_pass)
-target_link_libraries(circle_partitioner luci_export)
-target_link_libraries(circle_partitioner luci_partition)
-target_link_libraries(circle_partitioner arser)
-target_link_libraries(circle_partitioner pepper_csv2vec)
-target_link_libraries(circle_partitioner vconone)
-target_link_libraries(circle_partitioner nncc_common)
-
-install(TARGETS circle_partitioner DESTINATION bin)
diff --git a/compiler/circle-partitioner/README.md b/compiler/circle-partitioner/README.md
index 2e0a98638..760cf28d1 100644
--- a/compiler/circle-partitioner/README.md
+++ b/compiler/circle-partitioner/README.md
@@ -4,10 +4,10 @@ _circle-partitioner_ provides model partitioning of circle model to two or more
 
 ## How circle-partitioner work
 
-_circle-partitioner_ requires 3 positional arguments
-- first: `partition` file
-- second: `input` circle model file
-- third: `work` folder
+_circle-partitioner_ requires 3 arguments for inputs files
+- `--part_file`: `partition` file, use extension `.part`
+- `--input_file`: `input` circle model file
+- `--work_path`: `work` path where input files reside. this is optional and CWD if omitted
 
 And options to override `partition` file as a helper to try out without editing `partition` file.
 - `--backends`: override `backends` of `[partition]` section
@@ -20,7 +20,7 @@ are read from `work` folder.
 Outputs are (1) one or more partitioned circle models and (2) connection file that gives how
 the partitioned models should be connected to act like the source `input` model.
 
-Why does input files be placed in `work` folder too?
+Why does input files be placed in `work` path too?
 - this is still work in progress condition
 - use cases are still ambigious
 - original `input` model file can be used by the backend, so `.conn` file links it as `source`
@@ -94,7 +94,8 @@ Net_InstanceNorm_003/
 
 Command example
 ```
-./circle-partitioner Net_InstanceNorm_003.part Net_InstanceNorm_003.circle Net_InstanceNorm_003
+./circle-partitioner --part_file Net_InstanceNorm_003.part \
+--input_file Net_InstanceNorm_003.circle --work_path= Net_InstanceNorm_003
 ```
 
 Result of _circle-partitioner_
@@ -171,11 +172,11 @@ Consider partitioning with backends of OneRT
 
 Let's try with this command:
 ```
-circle_partitioner \
-   --partition Net_InstanceNorm_003.part \
-   --backends cpu,acl_cl \
-   --default cpu \
-   Net_InstanceNorm_003.circle Net_InstanceNorm_003
+circle-partitioner \
+   --backends cpu,acl_cl --default cpu \
+   --part_file Net_InstanceNorm_003.part \
+   --input_file Net_InstanceNorm_003.circle \
+   --work_path Net_InstanceNorm_003
 ```
 
 where `Net_InstanceNorm_003.part` is like this for initial design
diff --git a/compiler/circle-partitioner/requires.cmake b/compiler/circle-partitioner/requires.cmake
index 690d9531c..82d9c2b0f 100644
--- a/compiler/circle-partitioner/requires.cmake
+++ b/compiler/circle-partitioner/requires.cmake
@@ -1,4 +1,3 @@
-require("foder")
 require("crew")
 require("pepper-csv2vec")
 require("safemain")
diff --git a/compiler/circle-partitioner/src/CirclePartitioner.cpp b/compiler/circle-partitioner/src/CirclePartitioner.cpp
index 0151e92d3..5cecb9ae0 100644
--- a/compiler/circle-partitioner/src/CirclePartitioner.cpp
+++ b/compiler/circle-partitioner/src/CirclePartitioner.cpp
@@ -18,9 +18,7 @@
 #include "PartitionExport.h"
 #include "HelperPath.h"
 
-#include <foder/FileLoader.h>
-
-#include <luci/Importer.h>
+#include <luci/ImporterEx.h>
 #include <luci/Service/Validate.h>
 #include <luci/CircleExporter.h>
 #include <luci/CircleFileExpContract.h>
@@ -41,9 +39,9 @@ namespace
 
 const char *opt_bks = "--backends";
 const char *opt_def = "--default";
-const char *opt_part = "partition";
-const char *opt_input = "input";
-const char *opt_work = "work";
+const char *opt_part_file = "--part_file";
+const char *opt_input_file = "--input_file";
+const char *opt_work_path = "--work_path";
 
 void print_version(void)
 {
@@ -53,63 +51,25 @@ void print_version(void)
 
 void build_arser(arser::Arser &arser)
 {
-  arser.add_argument("--version")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("Show version information and exit")
-    .exit_with(print_version);
-
-  arser.add_argument(opt_bks)
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(false)
-    .help("Backends in CSV to use for partitioning");
-
-  arser.add_argument(opt_def)
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(false)
-    .help("Default backend to assign");
-
-  arser.add_argument(opt_part)
-    .nargs(1)
-    .type(arser::DataType::STR)
+  arser::Helper::add_version(arser, print_version);
+
+  arser.add_argument(opt_bks).help("Backends in CSV to use for partitioning");
+
+  arser.add_argument(opt_def).help("Default backend to assign");
+
+  arser.add_argument(opt_part_file)
+    .required(true)
     .help("Partition file which provides backend to assign");
-  arser.add_argument(opt_input)
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .help("Input circle model filename");
-  arser.add_argument(opt_work)
-    .nargs(1)
-    .type(arser::DataType::STR)
+  arser.add_argument(opt_input_file).required(true).help("Input circle model filename");
+  arser.add_argument(opt_work_path)
     .help("Work folder of partition, input files exist and output files are produced");
 }
 
 std::unique_ptr<luci::Module> load_model(const std::string &input_path)
 {
-  // Load model from the file
-  foder::FileLoader file_loader{input_path};
-  std::vector<char> model_data = file_loader.load();
-
-  // Verify flatbuffers
-  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
-  if (!circle::VerifyModelBuffer(verifier))
-  {
-    std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
-    return nullptr;
-  }
-
-  const circle::Model *circle_model = circle::GetModel(model_data.data());
-  if (circle_model == nullptr)
-  {
-    std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
-    return nullptr;
-  }
-
   // Import from input Circle file
-  luci::Importer importer;
-  return importer.importModule(circle_model);
+  luci::ImporterEx importerex;
+  return importerex.importVerifyModule(input_path);
 }
 
 } // namespace
@@ -133,9 +93,14 @@ int entry(int argc, char **argv)
     return EXIT_FAILURE;
   }
 
-  std::string partition_file = arser.get<std::string>(opt_part);
-  std::string input_file = arser.get<std::string>(opt_input);
-  std::string work_folder = arser.get<std::string>(opt_work);
+  std::string partition_file = arser.get<std::string>(opt_part_file);
+  std::string input_file = arser.get<std::string>(opt_input_file);
+  std::string work_folder = ".";
+
+  if (arser[opt_work_path])
+  {
+    work_folder = arser.get<std::string>(opt_work_path);
+  }
 
   std::string partition_path = work_folder + "/" + partition_file;
   std::string input_path = work_folder + "/" + input_file;
diff --git a/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt b/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt
index 5ec8b6ee5..a3a2902d9 100644
--- a/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt
+++ b/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt
@@ -18,7 +18,7 @@ unset(TEST_NAMES)
 get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
 
 set(options USE_QCONFIG)
-set(oneValueArgs DTYPE GRANULARITY)
+set(oneValueArgs DTYPE GRANULARITY INPUT_DTYPE OUTPUT_DTYPE)
 set(multiValueArgs "")
 
 macro(Add RECIPE)
@@ -29,6 +29,16 @@ macro(Add RECIPE)
     set(QCONFIG_OPT "--config" "${ARTIFACTS_BIN_PATH}/${RECIPE}.qconf.json")
   endif()
 
+  set(INPUT_DTYPE_OPT "")
+  if(ARG_INPUT_DTYPE)
+    set(INPUT_DTYPE_OPT "--input_type" "${ARG_INPUT_DTYPE}")
+  endif()
+
+  set(OUTPUT_DTYPE_OPT "")
+  if(ARG_OUTPUT_DTYPE)
+    set(OUTPUT_DTYPE_OPT "--output_type" "${ARG_OUTPUT_DTYPE}")
+  endif()
+
   set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE}.circle")
   set(FAKE_QUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.fq.circle")
   set(RECORDED_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.recorded.circle")
@@ -38,7 +48,10 @@ macro(Add RECIPE)
   add_custom_command(OUTPUT ${QUANT_CIRCLE_PATH}
     COMMAND $<TARGET_FILE:circle-quantizer> --quantize_dequantize_weights float32 ${ARG_DTYPE} ${ARG_GRANULARITY} ${QCONFIG_OPT} ${CIRCLE_PATH} ${FAKE_QUANT_CIRCLE_PATH}
     COMMAND $<TARGET_FILE:record-minmax> --input_model ${FAKE_QUANT_CIRCLE_PATH} --output_model ${RECORDED_CIRCLE_PATH}
-    COMMAND $<TARGET_FILE:circle-quantizer> --quantize_with_minmax float32 ${ARG_DTYPE} ${ARG_GRANULARITY} ${QCONFIG_OPT} ${RECORDED_CIRCLE_PATH} ${QUANT_CIRCLE_PATH}
+    COMMAND $<TARGET_FILE:circle-quantizer>
+      --quantize_with_minmax float32 ${ARG_DTYPE} ${ARG_GRANULARITY}
+      ${QCONFIG_OPT} ${RECORDED_CIRCLE_PATH} ${QUANT_CIRCLE_PATH}
+      ${INPUT_DTYPE_OPT} ${OUTPUT_DTYPE_OPT}
     DEPENDS 
       circle-quantizer
       record-minmax
diff --git a/compiler/circle-quantizer-dredd-recipe-test/test.lst b/compiler/circle-quantizer-dredd-recipe-test/test.lst
index 188103016..58f89c767 100644
--- a/compiler/circle-quantizer-dredd-recipe-test/test.lst
+++ b/compiler/circle-quantizer-dredd-recipe-test/test.lst
@@ -6,10 +6,75 @@
 
 ## TFLITE RECIPE
 
+# MPQ Test (default: u8, target: s16)
+Add(Quant_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_AveragePool2D_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_BatchMatMul_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Concatenation_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Conv_003 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_DepthwiseConv2D_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_FullyConnected_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_LeakyRelu_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Logistic_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_MaxPool2D_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Mean_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Mul_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Neg_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Pad_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_PRelu_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ReLU_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ReLU6_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Reshape_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ResizeBilinear_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ResizeNearestNeighbor_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Slice_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Softmax_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Tanh_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Transpose_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_TransposeConv_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+
+# MPQ Test (default: s16, target: u8)
+Add(Quant_Add_002 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_AveragePool2D_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_BatchMatMul_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Concatenation_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Conv_004 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_DepthwiseConv2D_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_FullyConnected_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_LeakyRelu_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Logistic_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_MaxPool2D_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Mean_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Mul_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Neg_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Pad_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_PRelu_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ReLU_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ReLU6_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Reshape_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ResizeBilinear_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ResizeNearestNeighbor_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Slice_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Softmax_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Tanh_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Transpose_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_TransposeConv_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+
 Add(Quant_Conv_Mul_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
 Add(Quant_Conv_Mul_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
 Add(Quant_Conv_Mul_Add_002 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
 Add(Quant_Split_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
 Add(Quant_Split_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Conv_000 DTYPE uint8 GRANULARITY channel INPUT_DTYPE float32)
+Add(Quant_Conv_001 DTYPE uint8 GRANULARITY channel OUTPUT_DTYPE float32)
+Add(Quant_Conv_002 DTYPE uint8 GRANULARITY channel INPUT_DTYPE float32 OUTPUT_DTYPE float32)
 
 AddFakeQuant(Quant_Add_000)
+
+## CIRCLE RECIPE
+
+# MPQ Test (default: u8, target: s16)
+Add(Quant_InstanceNorm_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+
+# MPQ Test (default: s16, target: u8)
+Add(Quant_InstanceNorm_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
diff --git a/compiler/circle-quantizer/CMakeLists.txt b/compiler/circle-quantizer/CMakeLists.txt
index 14e00972b..16e41a327 100644
--- a/compiler/circle-quantizer/CMakeLists.txt
+++ b/compiler/circle-quantizer/CMakeLists.txt
@@ -10,7 +10,6 @@ add_executable(circle-quantizer "${SOURCES}")
 target_include_directories(circle-quantizer PRIVATE ${Jsoncpp_INCLUDE_DIRS})
 
 target_link_libraries(circle-quantizer ${Jsoncpp_STATIC_LIB})
-target_link_libraries(circle-quantizer foder)
 target_link_libraries(circle-quantizer safemain)
 target_link_libraries(circle-quantizer oops)
 target_link_libraries(circle-quantizer loco)
diff --git a/compiler/circle-quantizer/requires.cmake b/compiler/circle-quantizer/requires.cmake
index c21e28e8d..4fcee1873 100644
--- a/compiler/circle-quantizer/requires.cmake
+++ b/compiler/circle-quantizer/requires.cmake
@@ -1,4 +1,3 @@
-require("foder")
 require("loco")
 require("locop")
 require("safemain")
diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp
index e0c85cb6e..f1e31ed8d 100644
--- a/compiler/circle-quantizer/src/CircleQuantizer.cpp
+++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp
@@ -14,9 +14,7 @@
  * limitations under the License.
  */
 
-#include <foder/FileLoader.h>
-
-#include <luci/Importer.h>
+#include <luci/ImporterEx.h>
 #include <luci/CircleQuantizer.h>
 #include <luci/Service/Validate.h>
 #include <luci/CircleExporter.h>
@@ -59,13 +57,31 @@ std::vector<std::shared_ptr<LayerParam>> read_layer_params(std::string &filename
   std::vector<std::shared_ptr<LayerParam>> p;
   for (auto layer : layers)
   {
-    auto l = std::make_shared<LayerParam>();
+    if (layer.isMember("name"))
     {
-      l->name = layer["name"].asString();
-      l->dtype = layer["dtype"].asString();
-      l->granularity = layer["granularity"].asString();
+      auto l = std::make_shared<LayerParam>();
+      {
+        l->name = layer["name"].asString();
+        l->dtype = layer["dtype"].asString();
+        l->granularity = layer["granularity"].asString();
+      }
+      p.emplace_back(l);
+    }
+
+    // Multiple names with the same dtype & granularity
+    if (layer.isMember("names"))
+    {
+      for (auto name : layer["names"])
+      {
+        auto l = std::make_shared<LayerParam>();
+        {
+          l->name = name.asString();
+          l->dtype = layer["dtype"].asString();
+          l->granularity = layer["granularity"].asString();
+        }
+        p.emplace_back(l);
+      }
     }
-    p.emplace_back(l);
   }
 
   return p;
@@ -109,23 +125,12 @@ int entry(int argc, char **argv)
 
   arser::Arser arser("circle-quantizer provides circle model quantization");
 
-  arser.add_argument("--version")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("Show version information and exit")
-    .exit_with(print_version);
-
-  arser.add_argument("-V", "--verbose")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("output additional information to stdout or stderr");
+  arser::Helper::add_version(arser, print_version);
+  arser::Helper::add_verbose(arser);
 
   arser.add_argument(qdqw)
     .nargs(3)
     .type(arser::DataType::STR_VEC)
-    .required(false)
     .help("Quantize-dequantize weight values required action before quantization. "
           "Three arguments required: input_model_dtype(float32) "
           "output_model_dtype(uint8) granularity(layer, channel)");
@@ -133,28 +138,24 @@ int entry(int argc, char **argv)
   arser.add_argument(qwmm)
     .nargs(3)
     .type(arser::DataType::STR_VEC)
-    .required(false)
     .help("Quantize with min/max values. "
           "Three arguments required: input_model_dtype(float32) "
           "output_model_dtype(uint8) granularity(layer, channel)");
 
   arser.add_argument(tf_maxpool)
     .nargs(0)
-    .required(false)
     .default_value(false)
     .help("Force MaxPool Op to have the same input/output quantparams. NOTE: This feature can "
           "degrade accuracy of some models");
 
   arser.add_argument(fake_quant)
     .nargs(0)
-    .required(false)
     .help("Convert a quantized model to a fake-quantized model. NOTE: This feature will "
           "generate an fp32 model.");
 
   arser.add_argument(rq)
     .nargs(2)
     .type(arser::DataType::STR_VEC)
-    .required(false)
     .help("Requantize a quantized model. "
           "Two arguments required: input_model_dtype(int8) "
           "output_model_dtype(uint8)");
@@ -162,7 +163,6 @@ int entry(int argc, char **argv)
   arser.add_argument(fq)
     .nargs(3)
     .type(arser::DataType::STR_VEC)
-    .required(false)
     .accumulated(true)
     .help("Write quantization parameters to the specified tensor. "
           "Three arguments required: tensor_name(string), "
@@ -171,32 +171,21 @@ int entry(int argc, char **argv)
   arser.add_argument(cq)
     .nargs(2)
     .type(arser::DataType::STR_VEC)
-    .required(false)
     .accumulated(true)
     .help("Copy quantization parameter from a tensor to another tensor."
           "Two arguments required: source_tensor_name(string), "
           "destination_tensor_name(string)");
 
   arser.add_argument("--input_type")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(false)
-    .help("Input type of quantized model (uint8 or int16)");
+    .help("Input type of quantized model (uint8, int16, or float32)");
 
   arser.add_argument("--output_type")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(false)
-    .help("Output type of quantized model (uint8 or int16)");
+    .help("Output type of quantized model (uint8, int16, or float32)");
 
-  arser.add_argument(cfg)
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(false)
-    .help("Path to the quantization configuration file");
+  arser.add_argument(cfg).help("Path to the quantization configuration file");
 
-  arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
-  arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
+  arser.add_argument("input").help("Input circle model");
+  arser.add_argument("output").help("Output circle model");
 
   arser.add_argument(gpd).nargs(0).required(false).default_value(false).help(
     "This will turn on profiling data generation.");
@@ -384,27 +373,10 @@ int entry(int argc, char **argv)
     settings->set(luci::UserSettings::Key::ProfilingDataGen, true);
 
   // Load model from the file
-  foder::FileLoader file_loader{input_path};
-  std::vector<char> model_data = file_loader.load();
-
-  // Verify flatbuffers
-  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
-  if (!circle::VerifyModelBuffer(verifier))
-  {
-    std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
-    return EXIT_FAILURE;
-  }
-
-  const circle::Model *circle_model = circle::GetModel(model_data.data());
-  if (circle_model == nullptr)
-  {
-    std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+  luci::ImporterEx importerex;
+  auto module = importerex.importVerifyModule(input_path);
+  if (module.get() == nullptr)
     return EXIT_FAILURE;
-  }
-
-  // Import from input Circle file
-  luci::Importer importer;
-  auto module = importer.importModule(circle_model);
 
   for (size_t idx = 0; idx < module->size(); ++idx)
   {
diff --git a/compiler/circle-tensordump/driver/Driver.cpp b/compiler/circle-tensordump/driver/Driver.cpp
index 70f3c8d84..c32dc3f5a 100644
--- a/compiler/circle-tensordump/driver/Driver.cpp
+++ b/compiler/circle-tensordump/driver/Driver.cpp
@@ -31,11 +31,9 @@ int entry(int argc, char **argv)
   arser::Arser arser{
     "circle-tensordump allows users to retrieve tensor information from a Circle model file"};
 
-  arser.add_argument("circle").nargs(1).type(arser::DataType::STR).help("Circle file path to dump");
+  arser.add_argument("circle").help("Circle file path to dump");
   arser.add_argument("--tensors").nargs(0).help("Dump to console");
   arser.add_argument("--tensors_to_hdf5")
-    .nargs(1)
-    .type(arser::DataType::STR)
     .help("Dump to hdf5 file. Specify hdf5 file path to be dumped");
 
   try
diff --git a/compiler/circle-tensordump/src/Dump.cpp b/compiler/circle-tensordump/src/Dump.cpp
index e477a7417..49afa73df 100644
--- a/compiler/circle-tensordump/src/Dump.cpp
+++ b/compiler/circle-tensordump/src/Dump.cpp
@@ -15,7 +15,8 @@
  */
 
 #include "Dump.h"
-#include "Reader.h"
+
+#include <mio_circle/Reader.h>
 
 #include <H5Cpp.h>
 
@@ -102,7 +103,7 @@ namespace circletensordump
 
 void DumpTensors::run(std::ostream &os, const circle::Model *model, const std::string &)
 {
-  circletensordump::Reader reader(model);
+  mio::circle::Reader reader(model);
   uint32_t num_subgraph = reader.num_subgraph();
   auto buffers = reader.buffers();
 
@@ -296,7 +297,7 @@ void DumpTensorsToHdf5::run(std::ostream &os, const circle::Model *model,
                             const std::string &output_path)
 {
   // loads a circle model
-  circletensordump::Reader reader(model);
+  mio::circle::Reader reader(model);
   uint32_t num_subgraph = reader.num_subgraph();
 
   // create a hdf5 file
diff --git a/compiler/circle-tensordump/src/Reader.cpp b/compiler/circle-tensordump/src/Reader.cpp
deleted file mode 100644
index 47b876054..000000000
--- a/compiler/circle-tensordump/src/Reader.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Reader.h"
-
-#include <mio_circle/Helper.h>
-
-#include <sstream>
-#include <string>
-
-namespace circletensordump
-{
-
-Reader::Reader(const circle::Model *model)
-{
-  _subgraphs = model->subgraphs();
-  _buffers = model->buffers();
-
-  auto opcodes = model->operator_codes();
-  for (const ::circle::OperatorCode *opcode : *opcodes)
-  {
-    _op_codes.push_back(opcode);
-  }
-}
-
-size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data)
-{
-  if (buff_data != nullptr)
-  {
-    *buff_data = nullptr;
-  }
-
-  if (buf_idx == 0)
-    return 0;
-
-  if (auto *buffer = (*_buffers)[buf_idx])
-  {
-    if (auto *array = buffer->data())
-    {
-      if (size_t size = array->size())
-      {
-        if (buff_data != nullptr)
-        {
-          *buff_data = reinterpret_cast<const uint8_t *>(array->data());
-        }
-        return size;
-      }
-    }
-  }
-
-  return 0;
-}
-
-circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const
-{
-  uint32_t index = op->opcode_index();
-  assert(index < _op_codes.size());
-  const circle::OperatorCode *opcode = _op_codes.at(index);
-
-  return mio::circle::builtin_code_neutral(opcode);
-}
-
-std::string Reader::opcode_name(const circle::Operator *op) const
-{
-  uint32_t index = op->opcode_index();
-  assert(index < _op_codes.size());
-  const circle::OperatorCode *opcode = _op_codes.at(index);
-
-  if (!mio::circle::is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid: " << index << ")";
-    return oss.str();
-  }
-
-  return mio::circle::opcode_name(opcode);
-}
-
-bool Reader::select_subgraph(uint32_t sgindex)
-{
-  _tensors = nullptr;
-  _operators = nullptr;
-
-  _inputs.clear();
-  _outputs.clear();
-
-  if (_subgraphs->Length() <= sgindex)
-  {
-    assert(false);
-    return false;
-  }
-
-  const circle::SubGraph *subgraph = (*_subgraphs)[sgindex];
-
-  _tensors = subgraph->tensors();
-  _operators = subgraph->operators();
-
-  _inputs = as_index_vector(subgraph->inputs());
-  _outputs = as_index_vector(subgraph->outputs());
-
-  return true;
-}
-
-} // namespace circletensordump
diff --git a/compiler/circle-tensordump/src/Reader.h b/compiler/circle-tensordump/src/Reader.h
deleted file mode 100644
index c868bc277..000000000
--- a/compiler/circle-tensordump/src/Reader.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLE_TENSORDUMP_READER_H__
-#define __CIRCLE_TENSORDUMP_READER_H__
-
-#include <mio/circle/schema_generated.h>
-
-#include <map>
-#include <string>
-#include <vector>
-
-namespace circletensordump
-{
-
-template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
-{
-  std::vector<T> ret(flat_array->Length());
-  for (uint32_t i = 0; i < flat_array->Length(); i++)
-  {
-    ret[i] = flat_array->Get(i);
-  }
-  return ret;
-}
-
-/**
- * @brief Loads Circle file and provides helpers to access attributes
- */
-class Reader
-{
-private:
-  using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>;
-  using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>;
-  using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
-  using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>;
-
-public:
-  Reader(const circle::Model *model);
-
-  Reader() = delete;
-
-public:
-  const std::vector<const circle::OperatorCode *> &opcodes() { return _op_codes; }
-  const CircleBuffers_t *buffers() { return _buffers; }
-  const CircleTensors_t *tensors() { return _tensors; }
-  const CircleOperators_t *operators() { return _operators; }
-  const std::vector<int32_t> &inputs() const { return _inputs; }
-  const std::vector<int32_t> &outputs() const { return _outputs; }
-
-  uint32_t num_subgraph() const { return _subgraphs->Length(); }
-
-  size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
-  circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
-  std::string opcode_name(const circle::Operator *op) const;
-
-public:
-  bool select_subgraph(uint32_t subgraph);
-
-private:
-  const CircleSubGraphs_t *_subgraphs{nullptr};
-  const CircleBuffers_t *_buffers{nullptr};
-  const CircleTensors_t *_tensors{nullptr};
-  const CircleOperators_t *_operators{nullptr};
-
-  std::vector<const circle::OperatorCode *> _op_codes;
-  std::vector<int32_t> _inputs;
-  std::vector<int32_t> _outputs;
-};
-
-} // namespace circletensordump
-
-#endif // __CIRCLE_TENSORDUMP_READER_H__
diff --git a/compiler/circle-verify/src/Driver.cpp b/compiler/circle-verify/src/Driver.cpp
index 7a44c65b9..c3a414701 100644
--- a/compiler/circle-verify/src/Driver.cpp
+++ b/compiler/circle-verify/src/Driver.cpp
@@ -25,7 +25,7 @@
 int entry(int argc, char **argv)
 {
   arser::Arser arser;
-  arser.add_argument("circle").type(arser::DataType::STR).help("Circle file path to verify");
+  arser.add_argument("circle").help("Circle file path to verify");
 
   try
   {
diff --git a/compiler/circle2circle-dredd-recipe-test/test.lst b/compiler/circle2circle-dredd-recipe-test/test.lst
index f41aac303..a6f2786d2 100644
--- a/compiler/circle2circle-dredd-recipe-test/test.lst
+++ b/compiler/circle2circle-dredd-recipe-test/test.lst
@@ -31,6 +31,8 @@ Add(Net_TConv_Add_002 PASS fuse_add_with_tconv)
 Add(Net_TConv_BN_000 PASS fuse_batchnorm_with_tconv)
 Add(Net_TConv_BN_001 PASS fuse_batchnorm_with_tconv)
 Add(Net_TConv_BN_002 PASS fuse_batchnorm_with_tconv)
+Add(Net_TConv_BN_003 PASS fuse_batchnorm_with_tconv)
+Add(Net_TConv_BN_004 PASS fuse_batchnorm_with_tconv)
 Add(Net_InstanceNorm_001 PASS fuse_instnorm)
 Add(Net_InstanceNorm_003 PASS fuse_instnorm)
 Add(Net_InstanceNorm_004 PASS fuse_instnorm)
@@ -46,6 +48,7 @@ Add(StridedSlice_003 PASS substitute_strided_slice_to_reshape)
 Add(MaxPoolWithArgmax_000 PASS resolve_customop_max_pool_with_argmax)
 Add(MaxPoolWithArgmax_001 PASS resolve_customop_max_pool_with_argmax)
 Add(MaxPoolWithArgmax_002 PASS resolve_customop_max_pool_with_argmax)
+Add(FullyConnected_007 PASS replace_non_const_fc_with_batch_matmul)
 
 ## CIRCLE RECIPE
 
diff --git a/compiler/circle2circle/CMakeLists.txt b/compiler/circle2circle/CMakeLists.txt
index cd79967b7..dbe485b9f 100644
--- a/compiler/circle2circle/CMakeLists.txt
+++ b/compiler/circle2circle/CMakeLists.txt
@@ -4,7 +4,6 @@ list(REMOVE_ITEM SOURCES ${TESTS})
 
 add_executable(circle2circle "${SOURCES}")
 target_include_directories(circle2circle PRIVATE src)
-target_link_libraries(circle2circle foder)
 target_link_libraries(circle2circle nncc_common)
 target_link_libraries(circle2circle safemain)
 target_link_libraries(circle2circle oops)
@@ -29,7 +28,6 @@ nnas_find_package(GTest REQUIRED)
 
 GTest_AddTest(circle2circle_test ${TESTS} ${SOURCES})
 target_include_directories(circle2circle_test PRIVATE src)
-target_link_libraries(circle2circle_test foder)
 target_link_libraries(circle2circle_test nncc_common)
 target_link_libraries(circle2circle_test oops)
 target_link_libraries(circle2circle_test hermes)
diff --git a/compiler/circle2circle/requires.cmake b/compiler/circle2circle/requires.cmake
index b6c61198f..4e5ed0dd1 100644
--- a/compiler/circle2circle/requires.cmake
+++ b/compiler/circle2circle/requires.cmake
@@ -1,4 +1,3 @@
-require("foder")
 require("loco")
 require("locop")
 require("logo-core")
diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp
index ae677a321..f5cf0d782 100644
--- a/compiler/circle2circle/src/Circle2Circle.cpp
+++ b/compiler/circle2circle/src/Circle2Circle.cpp
@@ -14,9 +14,7 @@
  * limitations under the License.
  */
 
-#include <foder/FileLoader.h>
-
-#include <luci/Importer.h>
+#include <luci/ImporterEx.h>
 #include <luci/CircleOptimizer.h>
 #include <luci/Service/ChangeOutputs.h>
 #include <luci/Service/Validate.h>
@@ -54,6 +52,11 @@ void csv_tokenize(const std::string &data, std::vector<std::string> &result)
     result.push_back(token);
 }
 
+void add_switch(arser::Arser &arser, const char *opt, const char *desc)
+{
+  arser.add_argument(opt).nargs(0).default_value(false).help(desc);
+}
+
 int entry(int argc, char **argv)
 {
   // Simple argument parser (based on map)
@@ -64,368 +67,125 @@ int entry(int argc, char **argv)
 
   arser::Arser arser("circle2circle provides circle model optimization and transformations");
 
-  arser.add_argument("--version")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("Show version information and exit")
-    .exit_with(print_version);
-
-  arser.add_argument("-V", "--verbose")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("output additional information to stdout or stderr");
-
-  arser.add_argument("--O1").nargs(0).required(false).default_value(false).help(
-    "Enable O1 optimize options");
-
-  arser.add_argument("--fold_add_v2")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will fold AddV2 operators with constant inputs");
-
-  arser.add_argument("--fold_cast")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will fold Cast operators with constant input");
-
-  arser.add_argument("--fold_dequantize")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will fold dequantize op");
-
-  arser.add_argument("--fold_dwconv")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will fold Depthwise Convolution operator with constant inputs");
-
-  arser.add_argument("--fold_gather")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will fold Gather operator");
-
-  arser.add_argument("--fold_sparse_to_dense")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will fold SparseToDense operator");
-
-  arser.add_argument("--forward_reshape_to_unaryop")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will move Reshape after UnaryOp for centain condition");
-
-  arser.add_argument("--fuse_activation_function")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will fuse Activation function to a preceding operator");
-
-  arser.add_argument("--fuse_add_with_fully_connected")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will fuse Add operator to FullyConnected operator");
-
-  arser.add_argument("--fuse_add_with_tconv")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will fuse Add operator to Transposed Convolution operator");
-
-  arser.add_argument("--fuse_batchnorm_with_conv")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will fuse BatchNorm operators to Convolution operator");
-
-  arser.add_argument("--fuse_batchnorm_with_dwconv")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will fuse BatchNorm operators to Depthwise Convolution operator");
-
-  arser.add_argument("--fuse_batchnorm_with_tconv")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will fuse BatchNorm operators to Transposed Convolution operator");
-
-  arser.add_argument("--fuse_bcq")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will fuse operators and apply Binary Coded Quantization");
-
-  arser.add_argument("--fuse_instnorm")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will fuse operators to InstanceNorm operator");
-
-  arser.add_argument("--fuse_mean_with_mean")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will fuse two Mean operations when they follow one by one."
-          "This will fold them into one operation and merge reduction indices.");
-
-  arser.add_argument("--fuse_transpose_with_mean")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will fuse Mean operation with a preceding Transpose under certain conditions.");
-
-  arser.add_argument("--make_batchnorm_gamma_positive")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will make negative gamma of BatchNorm into a small positive value (1e-10). Note "
-          "that this pass can change the execution result of the model. So, use it only when the "
-          "impact is known to be acceptable.");
-
-  arser.add_argument("--fuse_preactivation_batchnorm")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will fuse BatchNorm operators of pre-activations to Convolution operator");
-
-  arser.add_argument("--remove_fakequant")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will remove FakeQuant operators");
-
-  arser.add_argument("--remove_quantdequant")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will remove Quantize-Dequantize sequence");
-
-  arser.add_argument("--remove_redundant_quantize")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will remove redundant Quantize operators");
-
-  arser.add_argument("--remove_redundant_reshape")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will fuse or remove subsequent Reshape operators");
-
-  arser.add_argument("--remove_redundant_transpose")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will fuse or remove subsequent Transpose operators");
-
-  arser.add_argument("--remove_unnecessary_reshape")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will remove unnecessary reshape operators");
-
-  arser.add_argument("--remove_unnecessary_slice")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will remove unnecessary slice operators");
-
-  arser.add_argument("--remove_unnecessary_strided_slice")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will remove unnecessary strided slice operators");
-
-  arser.add_argument("--remove_unnecessary_split")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will remove unnecessary split operators");
-
-  arser.add_argument("--replace_cw_mul_add_with_depthwise_conv")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will replace channel-wise mul/add with DepthwiseConv2D operator");
-
-  arser.add_argument("--replace_sub_with_add")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will replace sub with add operator");
-
-  arser.add_argument("--resolve_customop_add")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will convert Custom(Add) to Add operator");
-
-  arser.add_argument("--resolve_customop_batchmatmul")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will convert Custom(BatchMatmul) to BatchMatmul operator");
-
-  arser.add_argument("--resolve_customop_matmul")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will convert Custom(Matmul) to Matmul operator");
-
-  arser.add_argument("--resolve_customop_max_pool_with_argmax")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will convert Custom(MaxPoolWithArgmax) to equivalent set of operators");
-
-  arser.add_argument("--shuffle_weight_to_16x1float32")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32. Note that "
-          "it only converts weights whose row is a multiple of 16");
-
-  arser.add_argument("--substitute_pack_to_reshape")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will convert single input Pack to Reshape");
-
-  arser.add_argument("--substitute_padv2_to_pad")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will convert certain condition PadV2 to Pad");
-
-  arser.add_argument("--substitute_splitv_to_split")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will convert certain condition SplitV to Split operator");
-
-  arser.add_argument("--substitute_squeeze_to_reshape")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will convert certain condition Squeeze to Reshape");
-
-  arser.add_argument("--substitute_strided_slice_to_reshape")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will convert certain condition Strided_Slice to Reshape");
-
-  arser.add_argument("--substitute_transpose_to_reshape")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will convert single input Transpose to Reshape");
-
-  arser.add_argument("--expand_broadcast_const")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will expand broadcastable constant inputs");
-
-  arser.add_argument("--convert_nchw_to_nhwc")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("Experimental: This will convert NCHW operators to NHWC under the assumption that "
-          "input model is NCHW.");
-
-  arser.add_argument("--nchw_to_nhwc_input_shape")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("Convert the input shape of the model (argument for --convert_nchw_to_nhwc).");
-
-  arser.add_argument("--nchw_to_nhwc_output_shape")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("Convert the output shape of the model (argument for --convert_nchw_to_nhwc).");
-
-  arser.add_argument("--transform_min_max_to_relu6")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("Transform Minimum(6)-Maximum(0) pattern to Relu6 operator");
-
-  arser.add_argument("--transform_min_relu_to_relu6")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("Transform Minimum(6)-Relu pattern to Relu6 operator");
-
-  arser.add_argument("--mute_warnings")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will turn off warning messages");
-
-  arser.add_argument("--disable_validation")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will turn off operator validations. May help input model investigation.");
-
-  arser.add_argument("--generate_profile_data")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("This will turn on profiling data generation.");
+  arser::Helper::add_version(arser, print_version);
+  arser::Helper::add_verbose(arser);
+
+  add_switch(arser, "--fold_add_v2", "This will fold AddV2 operators with constant inputs");
+  add_switch(arser, "--fold_cast", "This will fold Cast operators with constant input");
+  add_switch(arser, "--fold_densify",
+             "This will fold Densify operators with sparse constant input");
+  add_switch(arser, "--fold_dequantize", "This will fold dequantize op");
+  add_switch(arser, "--fold_dwconv",
+             "This will fold Depthwise Convolution operator with constant inputs");
+  add_switch(arser, "--fold_gather", "This will fold Gather operator");
+  add_switch(arser, "--fold_sparse_to_dense", "This will fold SparseToDense operator");
+  add_switch(arser, "--forward_reshape_to_unaryop",
+             "This will move Reshape after UnaryOp for centain condition");
+  add_switch(arser, "--fuse_activation_function",
+             "This will fuse Activation function to a preceding operator");
+  add_switch(arser, "--fuse_add_with_fully_connected",
+             "This will fuse Add operator to FullyConnected operator");
+  add_switch(arser, "--fuse_add_with_tconv",
+             "This will fuse Add operator to Transposed Convolution operator");
+  add_switch(arser, "--fuse_batchnorm_with_conv",
+             "This will fuse BatchNorm operators to Convolution operator");
+  add_switch(arser, "--fuse_batchnorm_with_dwconv",
+             "This will fuse BatchNorm operators to Depthwise Convolution operator");
+  add_switch(arser, "--fuse_batchnorm_with_tconv",
+             "This will fuse BatchNorm operators to Transposed Convolution operator");
+  add_switch(arser, "--fuse_bcq", "This will fuse operators and apply Binary Coded Quantization");
+  add_switch(arser, "--fuse_instnorm", "This will fuse operators to InstanceNorm operator");
+  add_switch(arser, "--fuse_mean_with_mean",
+             "This will fuse two Mean operations when they follow one by one. This will fold them "
+             "into one operation and merge reduction indices.");
+  add_switch(arser, "--fuse_transpose_with_mean",
+             "This will fuse Mean operation with a preceding Transpose under certain conditions.");
+  add_switch(arser, "--make_batchnorm_gamma_positive",
+             "This will make negative gamma of BatchNorm into a small positive value (1e-10). "
+             "Note that this pass can change the execution result of the model. So, use it only "
+             "when the impact is known to be acceptable.");
+  add_switch(arser, "--fuse_preactivation_batchnorm",
+             "This will fuse BatchNorm operators of pre-activations to Convolution operator");
+  add_switch(arser, "--remove_fakequant", "This will remove FakeQuant operators");
+  add_switch(arser, "--remove_quantdequant", "This will remove Quantize-Dequantize sequence");
+  add_switch(arser, "--remove_redundant_quantize", "This will remove redundant Quantize operators");
+  add_switch(arser, "--remove_redundant_reshape",
+             "This will fuse or remove subsequent Reshape operators");
+  add_switch(arser, "--remove_redundant_transpose",
+             "This will fuse or remove subsequent Transpose operators");
+  add_switch(arser, "--remove_unnecessary_reshape",
+             "This will remove unnecessary reshape operators");
+  add_switch(arser, "--remove_unnecessary_slice", "This will remove unnecessary slice operators");
+  add_switch(arser, "--remove_unnecessary_strided_slice",
+             "This will remove unnecessary strided slice operators");
+  add_switch(arser, "--remove_unnecessary_split", "This will remove unnecessary split operators");
+  add_switch(arser, "--replace_cw_mul_add_with_depthwise_conv",
+             "This will replace channel-wise mul/add with DepthwiseConv2D operator");
+  add_switch(arser, "--replace_sub_with_add", "This will replace sub with add operator");
+  add_switch(arser, "--resolve_customop_add", "This will convert Custom(Add) to Add operator");
+  add_switch(arser, "--resolve_customop_batchmatmul",
+             "This will convert Custom(BatchMatmul) to BatchMatmul operator");
+  add_switch(arser, "--resolve_customop_matmul",
+             "This will convert Custom(Matmul) to Matmul operator");
+  add_switch(arser, "--resolve_customop_max_pool_with_argmax",
+             "This will convert Custom(MaxPoolWithArgmax) to equivalent set of operators");
+  add_switch(arser, "--resolve_customop_splitv",
+             "This will convert Custom(SplitV) to SplitV operator");
+  add_switch(arser, "--shuffle_weight_to_16x1float32",
+             "This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32. Note that "
+             "it only converts weights whose row is a multiple of 16");
+  add_switch(arser, "--replace_non_const_fc_with_batch_matmul",
+             "Replace FullyConnected with BatchMatMul when its weight is non-constant");
+  add_switch(arser, "--substitute_pack_to_reshape",
+             "This will convert single input Pack to Reshape");
+  add_switch(arser, "--substitute_padv2_to_pad",
+             "This will convert certain condition PadV2 to Pad");
+  add_switch(arser, "--substitute_splitv_to_split",
+             "This will convert certain condition SplitV to Split operator");
+  add_switch(arser, "--substitute_squeeze_to_reshape",
+             "This will convert certain condition Squeeze to Reshape");
+  add_switch(arser, "--substitute_strided_slice_to_reshape",
+             "This will convert certain condition Strided_Slice to Reshape");
+  add_switch(arser, "--substitute_transpose_to_reshape",
+             "This will convert single input Transpose to Reshape");
+  add_switch(arser, "--expand_broadcast_const", "This will expand broadcastable constant inputs");
+  add_switch(arser, "--convert_nchw_to_nhwc",
+             "Experimental: This will convert NCHW operators to NHWC under the assumption that "
+             "input model is NCHW.");
+  add_switch(arser, "--nchw_to_nhwc_input_shape",
+             "Convert the input shape of the model (argument for --convert_nchw_to_nhwc).");
+  add_switch(arser, "--nchw_to_nhwc_output_shape",
+             "Convert the output shape of the model (argument for --convert_nchw_to_nhwc).");
+  add_switch(arser, "--transform_min_max_to_relu6",
+             "Transform Minimum(6)-Maximum(0) pattern to Relu6 operator");
+  add_switch(arser, "--transform_min_relu_to_relu6",
+             "Transform Minimum(6)-Relu pattern to Relu6 operator");
+  add_switch(arser, "--mute_warnings", "This will turn off warning messages");
+  add_switch(arser, "--disable_validation",
+             "This will turn off operator validations. May help input model investigation.");
+  add_switch(arser, "--generate_profile_data", "This will turn on profiling data generation.");
 
   arser.add_argument("--change_outputs")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(false)
     .help("Experimental: Change first subgraph output nodes to CSV names");
 
-  arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
-  arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
+  arser.add_argument("input").help("Input circle model");
+  arser.add_argument("output").help("Output circle model");
 
   // sparsification argument
-  arser.add_argument("--sparsify_tensor")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(false)
-    .help("Tensor name that you want to sparsify");
+  arser.add_argument("--sparsify_tensor").help("Tensor name that you want to sparsify");
 
   arser.add_argument("--sparsify_traversal_order")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(false)
     .default_value("0,1,2,3")
     .help("Traversal order of dimensions. Default value: 0,1,2,3");
 
   arser.add_argument("--sparsify_format")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(false)
     .default_value("d,s")
     .help("Format of each dimension. 'd' stands for dense, 's' stands for sparse(CSR). Default "
           "value: d,s");
 
-  arser.add_argument("--sparsify_block_size")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(false)
-    .help("Size of each block dimension");
+  arser.add_argument("--sparsify_block_size").help("Size of each block dimension");
 
   arser.add_argument("--sparsify_block_map")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(false)
     .default_value("0,1")
     .help("Map from block dimension to the original tensor dimension. Default value: 0,1");
 
@@ -446,20 +206,12 @@ int entry(int argc, char **argv)
     // If REPLACE is zero, it does not overwrite an existing value.
     setenv("LUCI_LOG", "100", 0);
   }
-  if (arser.get<bool>("--O1"))
-  {
-    options->enable(Algorithms::FuseBCQ);
-    options->enable(Algorithms::FuseInstanceNorm);
-    options->enable(Algorithms::ResolveCustomOpAdd);
-    options->enable(Algorithms::ResolveCustomOpBatchMatMul);
-    options->enable(Algorithms::ResolveCustomOpMatMul);
-    options->enable(Algorithms::RemoveRedundantTranspose);
-    options->enable(Algorithms::SubstitutePackToReshape);
-  }
   if (arser.get<bool>("--fold_add_v2"))
     options->enable(Algorithms::FoldAddV2);
   if (arser.get<bool>("--fold_cast"))
     options->enable(Algorithms::FoldCast);
+  if (arser.get<bool>("--fold_densify"))
+    options->enable(Algorithms::FoldDensify);
   if (arser.get<bool>("--fold_dequantize"))
     options->enable(Algorithms::FoldDequantize);
   if (arser.get<bool>("--fold_dwconv"))
@@ -524,8 +276,12 @@ int entry(int argc, char **argv)
     options->enable(Algorithms::ResolveCustomOpMatMul);
   if (arser.get<bool>("--resolve_customop_max_pool_with_argmax"))
     options->enable(Algorithms::ResolveCustomOpMaxPoolWithArgmax);
+  if (arser.get<bool>("--resolve_customop_splitv"))
+    options->enable(Algorithms::ResolveCustomOpSplitV);
   if (arser.get<bool>("--shuffle_weight_to_16x1float32"))
     options->enable(Algorithms::ShuffleWeightTo16x1Float32);
+  if (arser.get<bool>("--replace_non_const_fc_with_batch_matmul"))
+    options->enable(Algorithms::ReplaceNonConstFCWithBatchMatMul);
   if (arser.get<bool>("--substitute_pack_to_reshape"))
     options->enable(Algorithms::SubstitutePackToReshape);
   if (arser.get<bool>("--substitute_padv2_to_pad"))
@@ -595,37 +351,11 @@ int entry(int argc, char **argv)
     csv_tokenize(csv_nodes, new_outputs);
   }
 
-  // Load model from the file
-  foder::FileLoader file_loader{input_path};
-  std::vector<char> model_data;
-
-  try
-  {
-    model_data = file_loader.load();
-  }
-  catch (const std::runtime_error &err)
-  {
-    std::cerr << err.what() << std::endl;
-    return EXIT_FAILURE;
-  }
-
-  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
-  if (!circle::VerifyModelBuffer(verifier))
-  {
-    std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
-    return EXIT_FAILURE;
-  }
-
-  const circle::Model *circle_model = circle::GetModel(model_data.data());
-  if (circle_model == nullptr)
-  {
-    std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
-    return EXIT_FAILURE;
-  }
-
   // Import from input Circle file
-  luci::Importer importer;
-  auto module = importer.importModule(circle_model);
+  luci::ImporterEx importerex;
+  auto module = importerex.importVerifyModule(input_path);
+  if (module.get() == nullptr)
+    return EXIT_FAILURE;
 
   if (change_outputs)
   {
diff --git a/compiler/circlechef/tools/file/Driver.cpp b/compiler/circlechef/tools/file/Driver.cpp
index 76d0f3f7f..9c4256b40 100644
--- a/compiler/circlechef/tools/file/Driver.cpp
+++ b/compiler/circlechef/tools/file/Driver.cpp
@@ -28,10 +28,8 @@
 int entry(int argc, char **argv)
 {
   arser::Arser arser;
-  arser.add_argument("recipe")
-    .type(arser::DataType::STR)
-    .help("Source recipe file path to convert");
-  arser.add_argument("circle").type(arser::DataType::STR).help("Target circle file path");
+  arser.add_argument("recipe").help("Source recipe file path to convert");
+  arser.add_argument("circle").help("Target circle file path");
 
   try
   {
diff --git a/compiler/circlechef/tools/reverse/Driver.cpp b/compiler/circlechef/tools/reverse/Driver.cpp
index 639e0af6f..c8ef07c6f 100644
--- a/compiler/circlechef/tools/reverse/Driver.cpp
+++ b/compiler/circlechef/tools/reverse/Driver.cpp
@@ -25,10 +25,8 @@
 int entry(int argc, char **argv)
 {
   arser::Arser arser;
-  arser.add_argument("circle")
-    .type(arser::DataType::STR)
-    .help("Source circle file path to convert");
-  arser.add_argument("recipe").type(arser::DataType::STR).help("Target recipe file path");
+  arser.add_argument("circle").help("Source circle file path to convert");
+  arser.add_argument("recipe").help("Target recipe file path");
 
   try
   {
diff --git a/compiler/circledump/CMakeLists.txt b/compiler/circledump/CMakeLists.txt
index b65c06677..7485ff8e7 100644
--- a/compiler/circledump/CMakeLists.txt
+++ b/compiler/circledump/CMakeLists.txt
@@ -10,6 +10,7 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
 add_executable(circledump ${DRIVER} ${SOURCES})
 target_include_directories(circledump PRIVATE include)
 target_link_libraries(circledump arser)
+target_link_libraries(circledump foder)
 target_link_libraries(circledump mio_circle04)
 target_link_libraries(circledump mio_circle04_helper)
 target_link_libraries(circledump safemain)
diff --git a/compiler/circledump/driver/Driver.cpp b/compiler/circledump/driver/Driver.cpp
index 657f24fe0..5b0871a91 100644
--- a/compiler/circledump/driver/Driver.cpp
+++ b/compiler/circledump/driver/Driver.cpp
@@ -15,7 +15,7 @@
  */
 
 #include <arser/arser.h>
-#include <circleread/Model.h>
+#include <foder/FileLoader.h>
 #include <circledump/Dump.h>
 
 #include <iostream>
@@ -23,7 +23,7 @@
 int entry(int argc, char **argv)
 {
   arser::Arser arser;
-  arser.add_argument("circle").type(arser::DataType::STR).help("Circle file path to dump");
+  arser.add_argument("circle").help("Circle file path to dump");
 
   try
   {
@@ -38,14 +38,10 @@ int entry(int argc, char **argv)
 
   std::string circle_path = arser.get<std::string>("circle");
   // Load Circle model from a circle file
-  std::unique_ptr<circleread::Model> model = circleread::load_circle(circle_path);
-  if (model == nullptr)
-  {
-    std::cerr << "ERROR: Failed to load circle '" << circle_path << "'" << std::endl;
-    return 255;
-  }
-
-  const circle::Model *circlemodel = model->model();
+  foder::FileLoader fileLoader{circle_path};
+  std::vector<char> modelData = fileLoader.load();
+  const circle::Model *circlemodel = circle::GetModel(modelData.data());
+  // const circle::Model *circlemodel = model->model();
   if (circlemodel == nullptr)
   {
     std::cerr << "ERROR: Failed to load circle '" << circle_path << "'" << std::endl;
diff --git a/compiler/circledump/include/circleread/Model.h b/compiler/circledump/include/circleread/Model.h
deleted file mode 100644
index 234db8b4c..000000000
--- a/compiler/circledump/include/circleread/Model.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLEREAD_MODEL_H__
-#define __CIRCLEREAD_MODEL_H__
-
-#include <mio/circle/schema_generated.h>
-
-#include <memory>
-
-namespace circleread
-{
-
-struct Model
-{
-  virtual ~Model() = default;
-
-  virtual const ::circle::Model *model(void) const = 0;
-};
-
-/**
- * @brief Load Circle model (as a raw Model) from a given path
- *
- * @note May return a nullptr
- */
-std::unique_ptr<Model> load_circle(const std::string &path);
-
-} // namespace circleread
-
-#endif // __CIRCLEREAD_MODEL_H__
diff --git a/compiler/circledump/requires.cmake b/compiler/circledump/requires.cmake
index 362d67cf4..183dfe227 100644
--- a/compiler/circledump/requires.cmake
+++ b/compiler/circledump/requires.cmake
@@ -1,3 +1,4 @@
 require("arser")
+require("foder")
 require("mio-circle04")
 require("safemain")
diff --git a/compiler/circledump/src/Dump.cpp b/compiler/circledump/src/Dump.cpp
index 0b256dda8..69427a20e 100644
--- a/compiler/circledump/src/Dump.cpp
+++ b/compiler/circledump/src/Dump.cpp
@@ -16,8 +16,8 @@
 
 #include <circledump/Dump.h>
 #include <mio_circle/Helper.h>
+#include <mio_circle/Reader.h>
 
-#include "Read.h"
 #include "OpPrinter.h"
 #include "MetadataPrinter.h"
 
@@ -122,7 +122,7 @@ std::ostream &operator<<(std::ostream &os, const flatbuffers::Vector<T> *fbvect)
   return os;
 }
 
-void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
+void dump_sub_graph(std::ostream &os, mio::circle::Reader &reader)
 {
   auto tensors = reader.tensors();
   auto operators = reader.operators();
@@ -150,14 +150,14 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
     std::vector<int32_t> dims = {-1};
 
     if (tensor->shape())
-      dims = circleread::as_index_vector(tensor->shape());
+      dims = mio::circle::as_index_vector(tensor->shape());
 
     os << "T(" << reader.subgraph_index() << ":" << i << ") " << mio::circle::tensor_type(tensor)
        << " ";
     os << "(" << dims << ") ";
     if (tensor->shape_signature())
     {
-      std::vector<int32_t> dims_sig = circleread::as_index_vector(tensor->shape_signature());
+      std::vector<int32_t> dims_sig = mio::circle::as_index_vector(tensor->shape_signature());
       os << "(" << dims_sig << ") ";
     }
     os << "B(" << tensor->buffer() << ") ";
@@ -299,8 +299,8 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
     const auto op = operators->Get(i);
     circle::BuiltinOperator builtincode = reader.builtin_code(op);
 
-    const std::vector<int32_t> &inputs = circleread::as_index_vector(op->inputs());
-    const std::vector<int32_t> &outputs = circleread::as_index_vector(op->outputs());
+    const std::vector<int32_t> &inputs = mio::circle::as_index_vector(op->inputs());
+    const std::vector<int32_t> &outputs = mio::circle::as_index_vector(op->outputs());
     auto op_name = reader.opcode_name(op);
 
     os << "O(" << reader.subgraph_index() << ":" << i << ") " << op_name << " ";
@@ -356,7 +356,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
 
 void dump_model(std::ostream &os, const circle::Model *model)
 {
-  circleread::Reader reader(model);
+  mio::circle::Reader reader(model);
 
   uint32_t num_subgraph = reader.num_subgraph();
 
diff --git a/compiler/circledump/src/Load.cpp b/compiler/circledump/src/Load.cpp
deleted file mode 100644
index 67e7fa5a6..000000000
--- a/compiler/circledump/src/Load.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <circleread/Model.h>
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-
-namespace
-{
-
-class MemoryMappedModel final : public circleread::Model
-{
-public:
-  /**
-   * @require fd and data SHOULD be valid
-   */
-  explicit MemoryMappedModel(int fd, void *data, size_t size) : _fd{fd}, _data{data}, _size{size}
-  {
-    // DO NOTHING
-  }
-
-public:
-  ~MemoryMappedModel()
-  {
-    munmap(_data, _size);
-    close(_fd);
-  }
-
-public:
-  MemoryMappedModel(const MemoryMappedModel &) = delete;
-  MemoryMappedModel(MemoryMappedModel &&) = delete;
-
-public:
-  const ::circle::Model *model(void) const override { return ::circle::GetModel(_data); }
-
-private:
-  int _fd = -1;
-  void *_data = nullptr;
-  size_t _size = 0;
-};
-
-class FileDescriptor final
-{
-public:
-  FileDescriptor(int value) : _value{value}
-  {
-    // DO NOTHING
-  }
-
-public:
-  // NOTE Copy is not allowed
-  FileDescriptor(const FileDescriptor &) = delete;
-
-public:
-  // NOTE Move is allowed
-  FileDescriptor(FileDescriptor &&fd) { _value = fd.release(); }
-
-public:
-  ~FileDescriptor()
-  {
-    if (_value != -1)
-    {
-      // Close on destructor
-      close(_value);
-    }
-  }
-
-public:
-  int value(void) const { return _value; }
-
-public:
-  int release(void)
-  {
-    auto res = _value;
-    _value = -1;
-    return res;
-  }
-
-private:
-  int _value = -1;
-};
-
-} // namespace
-
-namespace circleread
-{
-
-std::unique_ptr<Model> load_circle(const std::string &path)
-{
-  FileDescriptor fd = open(path.c_str(), O_RDONLY);
-
-  if (fd.value() == -1)
-  {
-    // Return nullptr on open failure
-    return nullptr;
-  }
-
-  struct stat st;
-  if (fstat(fd.value(), &st) == -1)
-  {
-    // Return nullptr on fstat failure
-    return nullptr;
-  }
-
-  auto size = st.st_size;
-  auto data = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd.value(), 0);
-
-  if (data == MAP_FAILED)
-  {
-    // Return nullptr on mmap failure
-    return nullptr;
-  }
-
-  return std::unique_ptr<circleread::Model>{new MemoryMappedModel(fd.release(), data, size)};
-}
-
-} // namespace circleread
diff --git a/compiler/circledump/src/OpPrinter.cpp b/compiler/circledump/src/OpPrinter.cpp
index 02e5c26b5..817371dcf 100644
--- a/compiler/circledump/src/OpPrinter.cpp
+++ b/compiler/circledump/src/OpPrinter.cpp
@@ -15,7 +15,8 @@
  */
 
 #include "OpPrinter.h"
-#include "Read.h"
+
+#include <mio_circle/Helper.h>
 
 #include <memory>
 
@@ -233,7 +234,7 @@ public:
   {
     if (auto *reshape_params = op->builtin_options_as_ReshapeOptions())
     {
-      auto new_shape = circleread::as_index_vector(reshape_params->new_shape());
+      auto new_shape = mio::circle::as_index_vector(reshape_params->new_shape());
       os << "    ";
       os << "NewShape(" << new_shape << ")";
       os << std::endl;
@@ -802,6 +803,7 @@ OpPrinterRegistry::OpPrinterRegistry()
   // There is no Option for CEIL
   _op_map[circle::BuiltinOperator_CONCATENATION] = make_unique<ConcatenationPrinter>();
   _op_map[circle::BuiltinOperator_CONV_2D] = make_unique<Conv2DPrinter>();
+  // There is no Option for DENSIFY
   _op_map[circle::BuiltinOperator_DEPTH_TO_SPACE] = make_unique<DepthToSpacePrinter>();
   _op_map[circle::BuiltinOperator_DEPTHWISE_CONV_2D] = make_unique<DepthwiseConv2DPrinter>();
   // There is no Option for DEQUANTIZE
diff --git a/compiler/circledump/src/Read.cpp b/compiler/circledump/src/Read.cpp
deleted file mode 100644
index 3a7e98cde..000000000
--- a/compiler/circledump/src/Read.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Read.h"
-
-#include <mio_circle/Helper.h>
-
-#include <sstream>
-#include <string>
-
-namespace circleread
-{
-
-Reader::Reader(const circle::Model *model)
-{
-  _version = model->version();
-  _subgraphs = model->subgraphs();
-  _buffers = model->buffers();
-  _metadata = model->metadata();
-  _signature_defs = model->signature_defs();
-
-  auto opcodes = model->operator_codes();
-  for (const ::circle::OperatorCode *opcode : *opcodes)
-  {
-    _op_codes.push_back(opcode);
-  }
-}
-
-size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data)
-{
-  *buff_data = nullptr;
-
-  if (buf_idx == 0)
-    return 0;
-
-  if (auto *buffer = (*_buffers)[buf_idx])
-  {
-    if (auto *array = buffer->data())
-    {
-      if (size_t size = array->size())
-      {
-        *buff_data = reinterpret_cast<const uint8_t *>(array->data());
-        return size;
-      }
-    }
-  }
-
-  return 0;
-}
-
-circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const
-{
-  uint32_t index = op->opcode_index();
-  assert(index < _op_codes.size());
-  const circle::OperatorCode *opcode = _op_codes.at(index);
-
-  return opcode->builtin_code();
-}
-
-std::string Reader::opcode_name(const circle::Operator *op) const
-{
-  uint32_t index = op->opcode_index();
-  assert(index < _op_codes.size());
-  const circle::OperatorCode *opcode = _op_codes.at(index);
-
-  if (!mio::circle::is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid: " << index << ")";
-    return oss.str();
-  }
-
-  return mio::circle::opcode_name(opcode);
-}
-
-bool Reader::select_subgraph(uint32_t sgindex)
-{
-  _subgraph_index = sgindex;
-  _tensors = nullptr;
-  _operators = nullptr;
-
-  _inputs.clear();
-  _outputs.clear();
-
-  if (_subgraphs->Length() <= sgindex)
-  {
-    assert(false);
-    return false;
-  }
-
-  const circle::SubGraph *subgraph = (*_subgraphs)[sgindex];
-
-  auto name = subgraph->name();
-  _subgraph_name = name ? name->c_str() : "(noname)";
-
-  _tensors = subgraph->tensors();
-  _operators = subgraph->operators();
-  _data_format = subgraph->data_format();
-
-  _inputs = as_index_vector(subgraph->inputs());
-  _outputs = as_index_vector(subgraph->outputs());
-
-  return true;
-}
-
-} // namespace circleread
diff --git a/compiler/circledump/src/Read.h b/compiler/circledump/src/Read.h
deleted file mode 100644
index 05b0e5072..000000000
--- a/compiler/circledump/src/Read.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLEREAD_READ_H__
-#define __CIRCLEREAD_READ_H__
-
-#include <mio/circle/schema_generated.h>
-
-#include <map>
-#include <string>
-#include <vector>
-
-namespace circleread
-{
-
-template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
-{
-  if (flat_array == nullptr)
-  {
-    throw std::runtime_error("flat array is nullptr");
-  }
-
-  std::vector<T> ret(flat_array->Length());
-  for (uint32_t i = 0; i < flat_array->Length(); i++)
-  {
-    ret[i] = flat_array->Get(i);
-  }
-  return ret;
-}
-
-/**
- * @brief Loads Circle file and provides helpers to access attributes
- */
-class Reader
-{
-private:
-  using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>;
-  using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>;
-  using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
-  using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>;
-  using CircleMetadata_t = flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>;
-  using CircleSignatureDef_t = flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>>;
-
-public:
-  Reader(const circle::Model *model);
-
-  Reader() = delete;
-
-public:
-  uint32_t version() const { return _version; }
-
-  const std::vector<const circle::OperatorCode *> &opcodes() { return _op_codes; }
-  const CircleBuffers_t *buffers() { return _buffers; }
-  const CircleTensors_t *tensors() { return _tensors; }
-  const CircleOperators_t *operators() { return _operators; }
-  const std::vector<int32_t> &inputs() const { return _inputs; }
-  const std::vector<int32_t> &outputs() const { return _outputs; }
-  const circle::DataFormat &data_format() const { return _data_format; }
-  const CircleMetadata_t *metadata() const { return _metadata; }
-  const CircleSignatureDef_t *signature_defs() const { return _signature_defs; }
-
-  uint32_t num_subgraph() const { return _subgraphs->Length(); }
-
-  size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
-  circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
-  std::string opcode_name(const circle::Operator *op) const;
-
-public:
-  bool select_subgraph(uint32_t subgraph);
-  const std::string &subgraph_name(void) const { return _subgraph_name; }
-  uint32_t subgraph_index(void) const { return _subgraph_index; }
-
-private:
-  uint32_t _version;
-
-  const CircleSubGraphs_t *_subgraphs{nullptr};
-  const CircleBuffers_t *_buffers{nullptr};
-  const CircleTensors_t *_tensors{nullptr};
-  const CircleOperators_t *_operators{nullptr};
-  const CircleMetadata_t *_metadata{nullptr};
-  const CircleSignatureDef_t *_signature_defs{nullptr};
-
-  uint32_t _subgraph_index = 0;
-  std::string _subgraph_name;
-  std::vector<const circle::OperatorCode *> _op_codes;
-  std::vector<int32_t> _inputs;
-  std::vector<int32_t> _outputs;
-  circle::DataFormat _data_format = circle::DataFormat::DataFormat_CHANNELS_FIRST;
-};
-
-} // namespace circleread
-
-#endif // __CIRCLEREAD_READ_H__
diff --git a/compiler/cli/CMakeLists.txt b/compiler/cli/CMakeLists.txt
index 0fb99ddba..4ab0ea218 100644
--- a/compiler/cli/CMakeLists.txt
+++ b/compiler/cli/CMakeLists.txt
@@ -10,5 +10,5 @@ endif(NOT ENABLE_TEST)
 
 nnas_find_package(GTest QUIET)
 
-GTest_AddTEst(cli_test ${TESTS})
+GTest_AddTest(cli_test ${TESTS})
 target_link_libraries(cli_test cli)
diff --git a/compiler/coco/core/src/IR/Module.cpp b/compiler/coco/core/src/IR/Module.cpp
index 420cf6f0c..0db78941c 100644
--- a/compiler/coco/core/src/IR/Module.cpp
+++ b/compiler/coco/core/src/IR/Module.cpp
@@ -144,7 +144,7 @@ std::unique_ptr<Module> Module::create(void)
   m->_input = make_unique<coco::InputList>();
   m->_output = make_unique<coco::OutputList>();
 
-  return std::move(m);
+  return m;
 }
 
 } // namespace coco
diff --git a/compiler/coco/generic/src/IR/Data.cpp b/compiler/coco/generic/src/IR/Data.cpp
index 5ab7069ee..361dcc243 100644
--- a/compiler/coco/generic/src/IR/Data.cpp
+++ b/compiler/coco/generic/src/IR/Data.cpp
@@ -209,8 +209,7 @@ std::unique_ptr<Data> Data::create(void)
   data->_blob = std::move(blob);
   data->_fp32 = std::move(fp32);
 
-  // GCC 4.9 tries to copy data (while GCC 6.X doesn't)
-  return std::move(data);
+  return data;
 }
 
 } // namespace coco
diff --git a/compiler/common-artifacts/CMakeLists.txt b/compiler/common-artifacts/CMakeLists.txt
index 404149c15..34a3a4d7d 100644
--- a/compiler/common-artifacts/CMakeLists.txt
+++ b/compiler/common-artifacts/CMakeLists.txt
@@ -12,14 +12,6 @@ if(${PYTHON_VERSION_MINOR} LESS 8)
   return()
 endif()
 
-# Create python virtual environment with tensorflow 2.6.0
-set(VIRTUALENV_OVERLAY_TF_2_6_0 "${NNCC_OVERLAY_DIR}/venv_2_6_0")
-
-add_custom_command(
-  OUTPUT ${VIRTUALENV_OVERLAY_TF_2_6_0}
-  COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_6_0}
-)
-
 # Create python virtual environment with tensorflow 2.8.0
 set(VIRTUALENV_OVERLAY_TF_2_8_0 "${NNCC_OVERLAY_DIR}/venv_2_8_0")
 
@@ -30,33 +22,36 @@ add_custom_command(
 
 # Create requirements.txt and install required pip packages
 set(REQUIREMENTS_FILE "requirements.txt")
-set(REQUIREMENTS_OVERLAY_PATH_TF_2_6_0 "${VIRTUALENV_OVERLAY_TF_2_6_0}/${REQUIREMENTS_FILE}")
 set(REQUIREMENTS_OVERLAY_PATH_TF_2_8_0 "${VIRTUALENV_OVERLAY_TF_2_8_0}/${REQUIREMENTS_FILE}")
 
-add_custom_command(
-  OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
-  COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
-  COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.6.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
-  COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
-  COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python3.8 -m pip --default-timeout=1000 install --upgrade pip setuptools
-  COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python3.8 -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} --upgrade
-  DEPENDS ${VIRTUALENV_OVERLAY_TF_2_6_0}
-)
+set(PYTHON_OVERLAY python3)
+if(PYTHON_EXECUTABLE MATCHES python3.8)
+  set(PYTHON_OVERLAY python3.8)
+endif()
 
+# NOTE when using behind proxy with self signed certificate, need to set '--trusted-host' options
+set(PIP_OPTION_TRUSTED_HOST )
+if(DEFINED ENV{ONE_PIP_OPTION_TRUST_HOST})
+  set(PIP_OPTION_TRUSTED_HOST --trusted-host pypi.python.org --trusted-host files.pythonhosted.org --trusted-host pypi.org)
+endif()
+
+# NOTE refer https://github.com/protocolbuffers/protobuf/issues/10051
+# TODO remove protobuf==3.20.1 when issue is resolved
 add_custom_command(
   OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
   COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
   COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.8.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
   COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
-  COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/python3.8 -m pip --default-timeout=1000 install --upgrade pip setuptools
-  COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/python3.8 -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} --upgrade
+  COMMAND ${CMAKE_COMMAND} -E echo "protobuf==3.20.1" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
+  COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000
+          ${PIP_OPTION_TRUSTED_HOST} install --upgrade pip setuptools
+  COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000
+          ${PIP_OPTION_TRUSTED_HOST} install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} --upgrade
   DEPENDS ${VIRTUALENV_OVERLAY_TF_2_8_0}
 )
 
 add_custom_target(common_artifacts_python_deps ALL
-  DEPENDS ${VIRTUALENV_OVERLAY_TF_2_6_0}
-          ${VIRTUALENV_OVERLAY_TF_2_8_0}
-          ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
+  DEPENDS ${VIRTUALENV_OVERLAY_TF_2_8_0}
           ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
 )
 
@@ -246,7 +241,13 @@ foreach(RECIPE IN ITEMS ${RECIPES})
   if(NOT DEFINED NO_OPTIMIZE_${RECIPE})
     # Generate optimized .circle
     add_custom_command(OUTPUT ${OPT_CIRCLE_OUTPUT_PATH}
-      COMMAND $<TARGET_FILE:circle2circle> --O1 ${CIRCLE_OUTPUT_PATH} ${OPT_CIRCLE_OUTPUT_PATH}
+      # NOTE --resolve_customop_add is just to added for old -O1, no particular meaning
+      #      --fold_dequantize is added to fold Tensor(FLOAT16) + DEQUANTIZE (Net_Dequantize_Add)
+      #      model. FLOAT16 in general is NOT supported but only Tensor(FLOAT16) + DEQUANTIZE
+      #      sequence accepted as folded to Tensor(FLOAT32).
+      # TODO revise giving options from the list file
+      COMMAND $<TARGET_FILE:circle2circle> --resolve_customop_add --fold_dequantize --fold_densify
+              ${CIRCLE_OUTPUT_PATH} ${OPT_CIRCLE_OUTPUT_PATH}
       DEPENDS $<TARGET_FILE:circle2circle>  ${CIRCLE_OUTPUT_PATH}
       COMMENT "Generate ${OPT_CIRCLE_FILE}"
     )
diff --git a/compiler/common-artifacts/exclude.lst b/compiler/common-artifacts/exclude.lst
index 92b07fde8..2275a42d9 100644
--- a/compiler/common-artifacts/exclude.lst
+++ b/compiler/common-artifacts/exclude.lst
@@ -32,6 +32,7 @@ tcgenerate(BroadcastTo_000) # luci-interpreter doesn't support custom operator
 tcgenerate(Ceil_000)
 tcgenerate(Conv2D_003) # runtime doesn't support dilation
 tcgenerate(Cos_000)
+tcgenerate(Densify_000) # luci-interpreter doesn't support
 tcgenerate(DepthwiseConv2D_001) # runtime doesn't support dilation
 tcgenerate(DepthwiseConv2D_003) # runtime doesn't support dilation
 tcgenerate(DepthwiseConv2D_U8_001)  # luci-interpreter doesn't support channel-wise quantization yet
@@ -67,6 +68,8 @@ tcgenerate(Neg_000)
 tcgenerate(Net_BroadcastTo_AddV2_001) # luci-interpreter doesn't support custom operator
 tcgenerate(Net_Conv_FakeQuant_000) # luci-interpreter doesn't support FakeQuant yet
 tcgenerate(Net_Dangle_001)
+tcgenerate(Net_Densify_Add_000) # luci-interpreter doesn't support Densify yet
+tcgenerate(Net_Densify_Dequantize_Add_000) # luci-interpreter doesn't support Densify/Dequantize yet
 tcgenerate(Net_Gather_SparseToDense_AddV2_000) # luci-interpreter doesn't support custom operator
 tcgenerate(Net_ZeroDim_001) # luci-interpreter doesn't support zero dim
 tcgenerate(OneHot_000)
diff --git a/compiler/common-artifacts/src/TestDataGenerator.cpp b/compiler/common-artifacts/src/TestDataGenerator.cpp
index 33cecbbe2..7481050c5 100644
--- a/compiler/common-artifacts/src/TestDataGenerator.cpp
+++ b/compiler/common-artifacts/src/TestDataGenerator.cpp
@@ -142,23 +142,15 @@ void fill_random_range(void *data, uint32_t size, loco::DataType dtype, int32_t
 int entry(int argc, char **argv)
 {
   arser::Arser arser;
-  arser.add_argument("circle").type(arser::DataType::STR).help("Circle file you want to test");
-  arser.add_argument("--input_data")
-    .required(true)
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .help("Path to generate input data h5 file");
+  arser.add_argument("circle").help("Circle file you want to test");
+  arser.add_argument("--input_data").required(true).help("Path to generate input data h5 file");
   arser.add_argument("--expected_data")
     .required(true)
-    .nargs(1)
-    .type(arser::DataType::STR)
     .help("Path to generate expected data h5 file");
   arser.add_argument("--fixed_seed")
-    .required(false)
     .nargs(0)
     .help("Put a fixed seed into the random number generator");
   arser.add_argument("--input_range")
-    .required(false)
     .nargs(3)
     .type(arser::DataType::STR_VEC)
     .help("Set random number range [min max] for the input as 'name min max'");
diff --git a/compiler/crew/CMakeLists.txt b/compiler/crew/CMakeLists.txt
index 1824d86ab..45cda7562 100644
--- a/compiler/crew/CMakeLists.txt
+++ b/compiler/crew/CMakeLists.txt
@@ -12,9 +12,12 @@ if(NOT ENABLE_TEST)
   return()
 endif(NOT ENABLE_TEST)
 
+configure_file("src/test_read_semicolon.ini" "test_read_semicolon.ini" COPYONLY)
+
 nnas_find_package(GTest REQUIRED)
 
 GTest_AddTest(crew_test ${TESTS})
 target_include_directories(crew_test PRIVATE src)
 target_link_libraries(crew_test nncc_common)
 target_link_libraries(crew_test crew)
+target_link_libraries(crew_test foder)
diff --git a/compiler/crew/src/PConfigIni.cpp b/compiler/crew/src/PConfigIni.cpp
index f0e3e8e01..5177843bf 100644
--- a/compiler/crew/src/PConfigIni.cpp
+++ b/compiler/crew/src/PConfigIni.cpp
@@ -26,10 +26,36 @@
 #include <sstream>
 #include <stdexcept>
 #include <string>
+#include <vector>
 
 namespace crew
 {
 
+namespace
+{
+
+std::string filter_escape(const std::string &source)
+{
+  std::string key = source;
+
+  // if key is surrounded with quotation
+  // TODO for quotation
+
+  // if key has '\\' + ';', remove '\\'
+  auto pos = key.find("\\;");
+  while (pos != std::string::npos)
+  {
+    auto k1 = key.substr(0, pos);
+    auto k2 = key.substr(pos + 1);
+    key = k1 + k2;
+    pos = key.find("\\;");
+  }
+
+  return key;
+}
+
+} // namespace
+
 Sections read_ini(const char *data, size_t length)
 {
   assert(data != nullptr);
@@ -84,6 +110,7 @@ Sections read_ini(const char *data, size_t length)
       {
         auto key = string_line.substr(0, pos);
         auto val = string_line.substr(pos + 1);
+        key = filter_escape(key);
         section.items.emplace(key, val);
       }
     }
@@ -107,11 +134,53 @@ Sections read_ini(const std::string &path)
   return read_ini(ini_data.data(), ini_data.size());
 }
 
+namespace
+{
+
+void replace(std::string &source, const std::string &token, const std::string &replace)
+{
+  size_t pos = 0;
+  while ((pos = source.find(token, pos)) != std::string::npos)
+  {
+    source.replace(pos, token.length(), replace);
+    pos += replace.length(); // Handles the case where 'replace' is a substring of 'token'
+  }
+}
+
+Sections insert_escape(const Sections &inputs)
+{
+  Sections sections;
+
+  // for all section in sections;
+  // if key has ';' then replace with '\;'
+  for (auto &input : inputs)
+  {
+    Section section;
+    section.name = input.name;
+
+    for (auto &item : input.items)
+    {
+      auto key = item.first;
+      auto value = item.second;
+
+      replace(key, ";", "\\;");
+      section.items[key] = value;
+    }
+    sections.push_back(section);
+  }
+
+  return sections;
+}
+
+} // namespace
+
 void write_ini(std::ostream &os, const Sections &sections)
 {
   std::stringstream ss;
 
-  ss << sections;
+  auto processed = insert_escape(sections);
+
+  ss << processed;
 
   std::string strss = ss.str();
 
diff --git a/compiler/crew/src/PConfigIni.test.cpp b/compiler/crew/src/PConfigIni.test.cpp
index bdd2ccc1f..c062c6937 100644
--- a/compiler/crew/src/PConfigIni.test.cpp
+++ b/compiler/crew/src/PConfigIni.test.cpp
@@ -17,12 +17,14 @@
 #include "crew/PConfigIni.h"
 #include "crew/PConfigIniDump.h"
 
+#include <foder/FileLoader.h>
+
 #include <gtest/gtest.h>
 
 #include <sstream>
 #include <stdexcept>
 
-TEST(ConfigIniTest, read_ini_non_exist_file)
+TEST(ConfigIniTest, read_ini_non_exist_file_NEG)
 {
   EXPECT_THROW(crew::read_ini("/hello/world/not_a_file"), std::runtime_error);
 }
@@ -85,3 +87,60 @@ TEST(ConfigIniTest, write_ini_file_error_NEG)
   crew::Sections sections;
   EXPECT_THROW(crew::write_ini("/abc/def/cannot_access", sections), std::runtime_error);
 }
+
+TEST(ConfigIniTest, read_file_escape_semicolon)
+{
+  auto sections = crew::read_ini("test_read_semicolon.ini");
+  ASSERT_EQ(1UL, sections.size());
+
+  auto its = sections.begin();
+  ASSERT_NE(sections.end(), its);
+  EXPECT_TRUE("hello" == its->name);
+  ASSERT_EQ(1UL, its->items.size());
+
+  auto it = its->items.begin();
+  ASSERT_NE(its->items.end(), it);
+
+  EXPECT_TRUE("keya;keyb;keyc;keyd" == it->first);
+  EXPECT_TRUE("world" == it->second);
+}
+
+TEST(ConfigIniTest, write_file_escape_semicolon)
+{
+  std::string path("test_write_semicolon.ini");
+
+  // save key with ';'
+  {
+    crew::Sections sections;
+    crew::Section hello;
+    hello.name = "hello";
+    hello.items["keya;keyb;keyc;keyd"] = "world";
+    sections.push_back(hello);
+    crew::write_ini(path, sections);
+  }
+
+  // load the file and check if there is '\\'
+  std::string strbuffer;
+  {
+    foder::FileLoader file_loader{path};
+    auto ini_data = file_loader.load();
+
+    auto buffer = std::vector<char>();
+    auto length = ini_data.size();
+    buffer.reserve(length + 1);
+
+    char *pbuffer = buffer.data();
+    memcpy(pbuffer, ini_data.data(), length);
+    *(pbuffer + length) = 0;
+
+    strbuffer = pbuffer;
+  }
+  int32_t count = 0;
+  size_t pos = 0;
+  while ((pos = strbuffer.find("\\;", pos)) != std::string::npos)
+  {
+    count++;
+    pos++;
+  }
+  EXPECT_TRUE(count == 3);
+}
diff --git a/compiler/crew/src/test_read_semicolon.ini b/compiler/crew/src/test_read_semicolon.ini
new file mode 100644
index 000000000..d966fb707
--- /dev/null
+++ b/compiler/crew/src/test_read_semicolon.ini
@@ -0,0 +1,2 @@
+[hello]
+keya\;keyb\;keyc\;keyd=world
diff --git a/compiler/enco/core/src/CppGen/Host.cpp b/compiler/enco/core/src/CppGen/Host.cpp
index 7f9456239..63baf0b31 100644
--- a/compiler/enco/core/src/CppGen/Host.cpp
+++ b/compiler/enco/core/src/CppGen/Host.cpp
@@ -299,7 +299,7 @@ std::unique_ptr<pp::MultiLineText> HostBlockCompiler::compile(const coco::Block
     res->append(ins->accept(prn));
   }
 
-  return std::move(res);
+  return res;
 }
 
 } // namespace enco
diff --git a/compiler/enco/core/src/CppGen/Subnet.cpp b/compiler/enco/core/src/CppGen/Subnet.cpp
index 599b0794e..3fc14edf5 100644
--- a/compiler/enco/core/src/CppGen/Subnet.cpp
+++ b/compiler/enco/core/src/CppGen/Subnet.cpp
@@ -373,7 +373,7 @@ std::unique_ptr<SubnetStruct> SubnetStructBuilder::build(const ANNBinder *binder
   // Finalize compilation
   res->ctor()->append("ANeuralNetworksCompilation_finish(", cname, ");");
 
-  return std::move(res);
+  return res;
 }
 
 std::unique_ptr<pp::MultiLineText> SubnetBlockCompiler::compile(const ANNBinder *binder) const
@@ -415,7 +415,7 @@ std::unique_ptr<pp::MultiLineText> SubnetBlockCompiler::compile(const ANNBinder
 
   res->append("ANeuralNetworksExecution_free(execution);");
 
-  return std::move(res);
+  return res;
 }
 
 } // namespace enco
diff --git a/compiler/enco/core/src/Transforms/Split.cpp b/compiler/enco/core/src/Transforms/Split.cpp
index 714c27a72..4bb21b0a7 100644
--- a/compiler/enco/core/src/Transforms/Split.cpp
+++ b/compiler/enco/core/src/Transforms/Split.cpp
@@ -656,7 +656,7 @@ public:
           app->ofm(ofm);
           app->ker(ker);
 
-          return std::move(app);
+          return app;
         }
         else
         {
@@ -676,7 +676,7 @@ public:
           app->ofm(ofm);
           app->ker(ker);
 
-          return std::move(app);
+          return app;
         }
       }
     }
@@ -704,7 +704,7 @@ public:
         app->right(right);
         app->out(out);
 
-        return std::move(app);
+        return app;
       }
     }
     else if (auto op = eval->op()->asMul())
@@ -731,7 +731,7 @@ public:
         app->right(right);
         app->out(out);
 
-        return std::move(app);
+        return app;
       }
     }
     else if (auto op = eval->op()->asPadF())
@@ -754,7 +754,7 @@ public:
         app->ifm(ifm);
         app->ofm(ofm);
 
-        return std::move(app);
+        return app;
       }
     }
     else if (auto maxpool = eval->op()->asMaxPool2D())
@@ -779,7 +779,7 @@ public:
         app->ifm(ifm);
         app->ofm(ofm);
 
-        return std::move(app);
+        return app;
       }
     }
     else if (auto avgpool = eval->op()->asAvgPool2D())
@@ -808,7 +808,7 @@ public:
           app->ifm(ifm);
           app->ofm(ofm);
 
-          return std::move(app);
+          return app;
         }
       }
     }
@@ -831,7 +831,7 @@ public:
         app->ifm(ifm);
         app->ofm(ofm);
 
-        return std::move(app);
+        return app;
       }
     }
     else if (auto relu6 = eval->op()->asReLU6())
@@ -853,7 +853,7 @@ public:
         app->ifm(ifm);
         app->ofm(ofm);
 
-        return std::move(app);
+        return app;
       }
     }
     else if (auto op = eval->op()->asConcatF())
@@ -880,7 +880,7 @@ public:
         app->right(right);
         app->out(out);
 
-        return std::move(app);
+        return app;
       }
     }
     else if (auto op = eval->op()->asSub())
@@ -907,7 +907,7 @@ public:
         app->right(right);
         app->out(out);
 
-        return std::move(app);
+        return app;
       }
     }
     else if (auto op = eval->op()->asDiv())
@@ -934,7 +934,7 @@ public:
         app->right(right);
         app->out(out);
 
-        return std::move(app);
+        return app;
       }
     }
 
@@ -967,7 +967,7 @@ std::unique_ptr<ANNOpAppender> make_appender(coco::Instr *ins)
     app->left(depth_concat->fst()->asFeature());
     app->right(depth_concat->snd()->asFeature());
 
-    return std::move(app);
+    return app;
   }
 
   // Build ANN IR from ANNConv2D instruction
@@ -986,7 +986,7 @@ std::unique_ptr<ANNOpAppender> make_appender(coco::Instr *ins)
     app->ker(conv2d->ker()->asKernel());
     app->bias(coco::safe_cast<coco::FeatureObject>(conv2d->bias()));
 
-    return std::move(app);
+    return app;
   }
 
   return nullptr;
diff --git a/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp b/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp
index aa2cad705..32ad44385 100644
--- a/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp
+++ b/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp
@@ -25,6 +25,8 @@
 #include <loco/Service/TypeInference.h>
 #include <loco/Service/ShapeInference.h>
 
+#include <limits>
+
 namespace exo
 {
 
diff --git a/compiler/kuma/src/IntervalSet.h b/compiler/kuma/src/IntervalSet.h
index 3b6c5f666..1e26581c0 100644
--- a/compiler/kuma/src/IntervalSet.h
+++ b/compiler/kuma/src/IntervalSet.h
@@ -17,6 +17,7 @@
 #ifndef __KUMA_DETAILS_LIVE_INTERVAL_SET_H__
 #define __KUMA_DETAILS_LIVE_INTERVAL_SET_H__
 
+#include <cstdint>
 #include <map>
 
 namespace kuma
diff --git a/compiler/loco/include/loco/IR/DataTypeTraits.h b/compiler/loco/include/loco/IR/DataTypeTraits.h
index 1f78c9fec..6be46c3b3 100644
--- a/compiler/loco/include/loco/IR/DataTypeTraits.h
+++ b/compiler/loco/include/loco/IR/DataTypeTraits.h
@@ -83,6 +83,13 @@ template <> struct DataTypeImpl<DataType::U64>
   using Type = uint64_t;
 };
 
+template <> struct DataTypeImpl<DataType::FLOAT16>
+{
+  // float16 type with 16bit value, encoded with help of FP16 library
+  // https://github.com/Maratyszcza/FP16/
+  using Type = uint16_t;
+};
+
 template <> struct DataTypeImpl<DataType::FLOAT32>
 {
   // Use C++ float type for IEEE 32-bit floating-point numbers
@@ -132,6 +139,8 @@ inline uint32_t size(DataType data_type)
       return sizeof(DataTypeImpl<DataType::S64>::Type);
     case DataType::U64:
       return sizeof(DataTypeImpl<DataType::U64>::Type);
+    case DataType::FLOAT16:
+      return sizeof(DataTypeImpl<DataType::FLOAT16>::Type);
     case DataType::FLOAT32:
       return sizeof(DataTypeImpl<DataType::FLOAT32>::Type);
     case DataType::FLOAT64:
diff --git a/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp b/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp
index 500f08623..40ddb133b 100644
--- a/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp
+++ b/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp
@@ -122,9 +122,6 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
     {
       using namespace loco;
 
-      auto encoder = encode_node->encoder();
-      assert(encoder != nullptr);
-
       auto decode_node = dynamic_cast<loco::FeatureDecode *>(encode_node->input());
       if (decode_node == nullptr)
       {
@@ -132,6 +129,9 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
       }
       assert(decode_node->input() != nullptr);
 
+      auto encoder = encode_node->encoder();
+      assert(encoder != nullptr);
+
       auto decoder = decode_node->decoder();
       assert(decoder != nullptr);
 
@@ -302,9 +302,6 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
     {
       using namespace loco;
 
-      auto encoder = encode_node->encoder();
-      assert(encoder != nullptr);
-
       auto decode_node = dynamic_cast<loco::MatrixDecode *>(encode_node->input());
       if (decode_node == nullptr)
       {
@@ -312,6 +309,9 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
       }
       assert(decode_node->input() != nullptr);
 
+      auto encoder = encode_node->encoder();
+      assert(encoder != nullptr);
+
       auto decoder = decode_node->decoder();
       assert(decoder != nullptr);
 
diff --git a/compiler/luci-eval-driver/src/EvalDriver.cpp b/compiler/luci-eval-driver/src/EvalDriver.cpp
index 4762cffe7..0ed35431d 100644
--- a/compiler/luci-eval-driver/src/EvalDriver.cpp
+++ b/compiler/luci-eval-driver/src/EvalDriver.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include <luci/Importer.h>
+#include <luci/ImporterEx.h>
 #include <luci_interpreter/Interpreter.h>
 #include <luci/CircleExporter.h>
 #include <luci/CircleFileExpContract.h>
@@ -47,18 +47,6 @@ void writeDataToFile(const std::string &filename, const char *data, size_t data_
   }
 }
 
-std::unique_ptr<luci::Module> importModel(const std::string &filename)
-{
-  std::ifstream fs(filename, std::ifstream::binary);
-  if (fs.fail())
-  {
-    throw std::runtime_error("Cannot open model file \"" + filename + "\".\n");
-  }
-  std::vector<char> model_data((std::istreambuf_iterator<char>(fs)),
-                               std::istreambuf_iterator<char>());
-  return luci::Importer().importModule(circle::GetModel(model_data.data()));
-}
-
 template <typename NodeT> size_t getTensorSize(const NodeT *node)
 {
   uint32_t tensor_size = loco::size(node->dtype());
@@ -91,7 +79,8 @@ int entry(int argc, char **argv)
   const char *output_file = argv[4];
 
   // Load model from the file
-  std::unique_ptr<luci::Module> module = importModel(filename);
+  luci::ImporterEx importer;
+  std::unique_ptr<luci::Module> module = importer.importVerifyModule(filename);
   if (module == nullptr)
   {
     std::cerr << "ERROR: Failed to load '" << filename << "'" << std::endl;
diff --git a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
index d134a6b95..f0df58db3 100644
--- a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
+++ b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
@@ -12,6 +12,7 @@ REGISTER_KERNEL(Div)
 REGISTER_KERNEL(Elu)
 REGISTER_KERNEL(Exp)
 REGISTER_KERNEL(ExpandDims)
+REGISTER_KERNEL(Fill)
 REGISTER_KERNEL(Floor)
 REGISTER_KERNEL(FloorDiv)
 REGISTER_KERNEL(Equal)
@@ -44,6 +45,7 @@ REGISTER_KERNEL(Reshape)
 REGISTER_KERNEL(ResizeBilinear)
 REGISTER_KERNEL(ResizeNearestNeighbor)
 REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Shape)
 REGISTER_KERNEL(Softmax)
 REGISTER_KERNEL(SpaceToBatchND)
 REGISTER_KERNEL(SpaceToDepth)
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h b/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h
index 15ff0327b..efa6b167e 100644
--- a/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h
@@ -18,7 +18,7 @@
 #define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
 
 #include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
-#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+#include "PALreference_ops.h"
 
 namespace luci_interpreter_pal
 {
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h b/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h
index 6046789ae..effb85d54 100644
--- a/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h
@@ -17,7 +17,7 @@
 #ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
 #define LUCI_INTERPRETER_PAL_QUANTIZE_H
 
-#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+#include "PALreference_ops.h"
 
 namespace luci_interpreter_pal
 {
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALreference_ops.h b/compiler/luci-interpreter/pal/cmsisnn/PALreference_ops.h
new file mode 100644
index 000000000..813b1ec2c
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALreference_ops.h
@@ -0,0 +1,1568 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+#define LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <type_traits>
+
+#include "third_party/eigen3/Eigen/Core"
+#include "fixedpoint/fixedpoint.h"
+#include "ruy/profiler/instrumentation.h" // from @ruy
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/add.h"
+#include "tensorflow/lite/kernels/internal/reference/add_n.h"
+#include "tensorflow/lite/kernels/internal/reference/arg_min_max.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_matmul.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
+#include "tensorflow/lite/kernels/internal/reference/cast.h"
+#include "tensorflow/lite/kernels/internal/reference/ceil.h"
+#include "tensorflow/lite/kernels/internal/reference/comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/concatenation.h"
+#include "tensorflow/lite/kernels/internal/reference/conv.h"
+#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h"
+#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/div.h"
+#include "tensorflow/lite/kernels/internal/reference/elu.h"
+#include "tensorflow/lite/kernels/internal/reference/exp.h"
+#include "tensorflow/lite/kernels/internal/reference/fill.h"
+#include "tensorflow/lite/kernels/internal/reference/floor.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_div.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_mod.h"
+#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
+#include "tensorflow/lite/kernels/internal/reference/gather.h"
+#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
+#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
+#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h"
+#include "tensorflow/lite/kernels/internal/reference/log_softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h"
+#include "tensorflow/lite/kernels/internal/reference/mul.h"
+#include "tensorflow/lite/kernels/internal/reference/neg.h"
+#include "tensorflow/lite/kernels/internal/reference/pad.h"
+#include "tensorflow/lite/kernels/internal/reference/pooling.h"
+#include "tensorflow/lite/kernels/internal/reference/prelu.h"
+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "tensorflow/lite/kernels/internal/reference/quantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reduce.h"
+#include "tensorflow/lite/kernels/internal/reference/requantize.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_bilinear.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h"
+#include "tensorflow/lite/kernels/internal/reference/round.h"
+#include "tensorflow/lite/kernels/internal/reference/softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_depth.h"
+#include "tensorflow/lite/kernels/internal/reference/strided_slice.h"
+#include "tensorflow/lite/kernels/internal/reference/string_comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/sub.h"
+#include "tensorflow/lite/kernels/internal/reference/tanh.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose_conv.h"
+#include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
+#include "tensorflow/lite/kernels/internal/tensor.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+namespace tflite
+{
+
+namespace reference_ops
+{
+
+template <typename T>
+inline void Relu(const RuntimeShape &input_shape, const T *input_data,
+                 const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const T val = input_data[i];
+    const T lower = 0;
+    const T clamped = val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+template <typename T>
+inline void Relu1(const RuntimeShape &input_shape, const T *input_data,
+                  const RuntimeShape &output_shape, T *output_data)
+{
+  ruy::profiler::ScopeLabel label("Relu1 (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const T val = input_data[i];
+    const T upper = 1;
+    const T lower = -1;
+    const T clamped = val > upper ? upper : val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+inline void Relu6(const RuntimeShape &input_shape, const float *input_data,
+                  const RuntimeShape &output_shape, float *output_data)
+{
+  ruy::profiler::ScopeLabel label("Relu6 (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const float val = input_data[i];
+    const float upper = 6;
+    const float lower = 0;
+    const float clamped = val > upper ? upper : val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ReluParams &params, const RuntimeShape &input_shape,
+                  const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+  ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const int32 val = static_cast<int32_t>(input_data[i]);
+    int32 clamped = params.output_offset + MultiplyByQuantizedMultiplier(val - params.input_offset,
+                                                                         params.output_multiplier,
+                                                                         params.output_shift);
+    clamped = std::max(params.quantized_activation_min, clamped);
+    clamped = std::min(params.quantized_activation_max, clamped);
+    output_data[i] = static_cast<T>(clamped);
+  }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ActivationParams &params, const RuntimeShape &input_shape,
+                  const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+  ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  const T max_value = params.quantized_activation_max;
+  const T min_value = params.quantized_activation_min;
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const T val = input_data[i];
+    const T clamped = val > max_value ? max_value : val < min_value ? min_value : val;
+    output_data[i] = clamped;
+  }
+}
+
+// TODO(jiawen): We can implement BroadcastMul on buffers of arbitrary
+// dimensionality if the runtime code does a single loop over one dimension
+// that handles broadcasting as the base case. The code generator would then
+// generate max(D1, D2) nested for loops.
+inline void BroadcastMulFivefold(const ArithmeticParams &unswitched_params,
+                                 const RuntimeShape &unswitched_input1_shape,
+                                 const uint8 *unswitched_input1_data,
+                                 const RuntimeShape &unswitched_input2_shape,
+                                 const uint8 *unswitched_input2_data,
+                                 const RuntimeShape &output_shape, uint8 *output_data)
+{
+  ArithmeticParams switched_params = unswitched_params;
+  switched_params.input1_offset = unswitched_params.input2_offset;
+  switched_params.input2_offset = unswitched_params.input1_offset;
+
+  const bool use_unswitched = unswitched_params.broadcast_category ==
+                              tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
+
+  const ArithmeticParams &params = use_unswitched ? unswitched_params : switched_params;
+  const uint8 *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data;
+  const uint8 *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data;
+
+  // Fivefold nested loops. The second input resets its position for each
+  // iteration of the second loop. The first input resets its position at the
+  // beginning of the fourth loop. The innermost loop is an elementwise Mul of
+  // sections of the arrays.
+  uint8 *output_data_ptr = output_data;
+  const uint8 *input1_data_ptr = input1_data;
+  const uint8 *input2_data_reset = input2_data;
+  int y0 = params.broadcast_shape[0];
+  int y1 = params.broadcast_shape[1];
+  int y2 = params.broadcast_shape[2];
+  int y3 = params.broadcast_shape[3];
+  int y4 = params.broadcast_shape[4];
+  for (int i0 = 0; i0 < y0; ++i0)
+  {
+    const uint8 *input2_data_ptr;
+    for (int i1 = 0; i1 < y1; ++i1)
+    {
+      input2_data_ptr = input2_data_reset;
+      for (int i2 = 0; i2 < y2; ++i2)
+      {
+        for (int i3 = 0; i3 < y3; ++i3)
+        {
+          MulElementwise(y4, params, input1_data_ptr, input2_data_ptr, output_data_ptr);
+          input2_data_ptr += y4;
+          output_data_ptr += y4;
+        }
+        input1_data_ptr += y4;
+      }
+    }
+    input2_data_reset = input2_data_ptr;
+  }
+}
+
+inline void Mul(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+                const int16 *input1_data, const RuntimeShape &input2_shape,
+                const int16 *input2_data, const RuntimeShape &output_shape, int16 *output_data)
+{
+  ruy::profiler::ScopeLabel label("Mul/Int16");
+
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+  for (int i = 0; i < flat_size; i++)
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+    F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+    output_data[i] = unclamped_result.raw();
+  }
+}
+
+inline void Mul(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+                const int16 *input1_data, const RuntimeShape &input2_shape,
+                const int16 *input2_data, const RuntimeShape &output_shape, uint8 *output_data)
+{
+  ruy::profiler::ScopeLabel label("Mul/Int16Uint8");
+  int32 output_offset = params.output_offset;
+  int32 output_activation_min = params.quantized_activation_min;
+  int32 output_activation_max = params.quantized_activation_max;
+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+  for (int i = 0; i < flat_size; i++)
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+    F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+    int16 rescaled_result = gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
+    int16 clamped_result = std::min<int16>(output_activation_max - output_offset, rescaled_result);
+    clamped_result = std::max<int16>(output_activation_min - output_offset, clamped_result);
+    output_data[i] = output_offset + clamped_result;
+  }
+}
+
+inline void Sub16(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+                  const int16_t *input1_data, const RuntimeShape &input2_shape,
+                  const int16_t *input2_data, const RuntimeShape &output_shape,
+                  int16_t *output_data)
+{
+  ruy::profiler::ScopeLabel label("Sub/Int16");
+  const int input1_shift = params.input1_shift;
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+  const int16 output_activation_min = params.quantized_activation_min;
+  const int16 output_activation_max = params.quantized_activation_max;
+
+  TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
+  TFLITE_DCHECK_LE(input1_shift, 0);
+  TFLITE_DCHECK_LE(params.input2_shift, 0);
+  const int16 *not_shift_input = input1_shift == 0 ? input1_data : input2_data;
+  const int16 *shift_input = input1_shift == 0 ? input2_data : input1_data;
+  const int input_right_shift = input1_shift == 0 ? -params.input2_shift : -input1_shift;
+
+  if (input1_shift == 0)
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+    for (int i = 0; i < flat_size; ++i)
+    {
+      F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+      F0 scaled_input =
+        F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+      F0 result = SaturatingSub(input_ready_scaled, scaled_input);
+      const int16 raw_output = result.raw();
+      const int16 clamped_output =
+        std::min(output_activation_max, std::max(output_activation_min, raw_output));
+      output_data[i] = clamped_output;
+    }
+  }
+  else
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+    for (int i = 0; i < flat_size; ++i)
+    {
+      F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+      F0 scaled_input =
+        F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+      F0 result = SaturatingSub(scaled_input, input_ready_scaled);
+      const int16 raw_output = result.raw();
+      const int16 clamped_output =
+        std::min(output_activation_max, std::max(output_activation_min, raw_output));
+      output_data[i] = clamped_output;
+    }
+  }
+}
+
+template <typename Scalar>
+void Pack(const PackParams &params, const RuntimeShape *const *input_shapes,
+          const Scalar *const *input_data, const RuntimeShape &output_shape, Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("Pack");
+  const int dimensions = output_shape.DimensionsCount();
+  int axis = params.axis;
+  int inputs_count = params.inputs_count;
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; i++)
+  {
+    outer_size *= output_shape.Dims(i);
+  }
+  int copy_size = 1;
+  for (int i = params.axis + 1; i < dimensions; i++)
+  {
+    copy_size *= output_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+  for (int i = 0; i < inputs_count; ++i)
+  {
+    for (int k = 0; k < outer_size; k++)
+    {
+      const Scalar *input_ptr = input_data[i] + copy_size * k;
+      int loc = k * inputs_count * copy_size + i * copy_size;
+      memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar));
+    }
+  }
+}
+
+template <typename Scalar>
+void Unpack(const UnpackParams &params, const RuntimeShape &input_shape, const Scalar *input_data,
+            const RuntimeShape &output_shape, Scalar *const *output_datas)
+{
+  ruy::profiler::ScopeLabel label("Unpack");
+  const int dimensions = input_shape.DimensionsCount();
+  const int outputs_count = params.num_split;
+
+  int outer_size = 1;
+  int axis = params.axis;
+  if (axis < 0)
+  {
+    axis += dimensions;
+  }
+  TFLITE_DCHECK_GE(axis, 0);
+  TFLITE_DCHECK_LT(axis, dimensions);
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+  int copy_size = 1;
+  for (int i = axis + 1; i < dimensions; ++i)
+  {
+    copy_size *= input_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size);
+
+  for (int i = 0; i < outputs_count; ++i)
+  {
+    for (int k = 0; k < outer_size; k++)
+    {
+      Scalar *output_ptr = output_datas[i] + copy_size * k;
+      int loc = k * outputs_count * copy_size + i * copy_size;
+      memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+    }
+  }
+}
+
+template <typename Scalar>
+void PackWithScaling(const PackParams &params, const RuntimeShape *const *input_shapes,
+                     const uint8 *const *input_data, const RuntimeShape &output_shape,
+                     uint8 *output_data)
+{
+  ruy::profiler::ScopeLabel label("PackWithScaling");
+  const int dimensions = output_shape.DimensionsCount();
+  int axis = params.axis;
+  const int32 *input_zeropoint = params.input_zeropoint;
+  const float *input_scale = params.input_scale;
+  int inputs_count = params.inputs_count;
+  const int32 output_zeropoint = params.output_zeropoint;
+  const float output_scale = params.output_scale;
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; i++)
+  {
+    outer_size *= output_shape.Dims(i);
+  }
+  int copy_size = 1;
+  for (int i = axis + 1; i < dimensions; i++)
+  {
+    copy_size *= output_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+  Scalar *output_ptr = output_data;
+  const float inverse_output_scale = 1.f / output_scale;
+  for (int k = 0; k < outer_size; k++)
+  {
+    for (int i = 0; i < inputs_count; ++i)
+    {
+      if (input_zeropoint[i] == output_zeropoint && input_scale[i] == output_scale)
+      {
+        memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar));
+      }
+      else
+      {
+        assert(false);
+        const float scale = input_scale[i] * inverse_output_scale;
+        const float bias = -input_zeropoint[i] * scale;
+        auto input_ptr = input_data[i];
+        for (int j = 0; j < copy_size; ++j)
+        {
+          const int value =
+            static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
+          output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
+        }
+      }
+      output_ptr += copy_size;
+    }
+  }
+}
+
+template <typename Scalar>
+void DepthConcatenation(const ConcatenationParams &params, const RuntimeShape *const *input_shapes,
+                        const Scalar *const *input_data, const RuntimeShape &output_shape,
+                        Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("DepthConcatenation");
+  auto params_copy = params;
+  params_copy.axis = 3;
+  Concatenation(params_copy, input_shapes, input_data, output_shape, output_data);
+}
+
+inline void LstmCell(const LstmCellParams &params, const RuntimeShape &unextended_input_shape,
+                     const float *input_data, const RuntimeShape &unextended_prev_activ_shape,
+                     const float *prev_activ_data, const RuntimeShape &weights_shape,
+                     const float *weights_data, const RuntimeShape &unextended_bias_shape,
+                     const float *bias_data, const RuntimeShape &unextended_prev_state_shape,
+                     const float *prev_state_data,
+                     const RuntimeShape &unextended_output_state_shape, float *output_state_data,
+                     const RuntimeShape &unextended_output_activ_shape, float *output_activ_data,
+                     const RuntimeShape &unextended_concat_temp_shape, float *concat_temp_data,
+                     const RuntimeShape &unextended_activ_temp_shape, float *activ_temp_data)
+{
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+  const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+  const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+  const RuntimeShape output_state_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+  const RuntimeShape output_activ_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+  const RuntimeShape concat_temp_shape =
+    RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+  const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int batches = MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
+                                  output_state_shape, 0, output_activ_shape, 0);
+  const int height = MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
+                                 output_state_shape, 1, output_activ_shape, 1);
+  const int width = MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
+                                output_state_shape, 2, output_activ_shape, 2);
+  const int input_depth = input_shape.Dims(3);
+  const int prev_activ_depth = prev_activ_shape.Dims(3);
+  const int total_input_depth = prev_activ_depth + input_depth;
+  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+  const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+  TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+  const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+                                       3, output_activ_shape, 3);
+  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+
+  // Concatenate prev_activ and input data together
+  std::vector<float const *> concat_input_arrays_data;
+  std::vector<RuntimeShape const *> concat_input_arrays_shapes;
+  concat_input_arrays_data.push_back(input_data);
+  concat_input_arrays_data.push_back(prev_activ_data);
+  concat_input_arrays_shapes.push_back(&input_shape);
+  concat_input_arrays_shapes.push_back(&prev_activ_shape);
+  tflite::ConcatenationParams concat_params;
+  concat_params.axis = 3;
+  concat_params.inputs_count = concat_input_arrays_data.size();
+  Concatenation(concat_params, &(concat_input_arrays_shapes[0]), &(concat_input_arrays_data[0]),
+                concat_temp_shape, concat_temp_data);
+
+  // Fully connected
+  tflite::FullyConnectedParams fc_params;
+  fc_params.float_activation_min = std::numeric_limits<float>::lowest();
+  fc_params.float_activation_max = std::numeric_limits<float>::max();
+  FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape, weights_data,
+                 bias_shape, bias_data, activ_temp_shape, activ_temp_data);
+
+  // Memory state update (the LSTM "guts")
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int w = 0; w < width; ++w)
+    {
+      for (int h = 0; h < height; ++h)
+      {
+        for (int c = 0; c < output_depth; ++c)
+        {
+          const float input_gate =
+            1.f /
+            (1.f +
+             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 0 * output_depth + c)]));
+          const float new_input =
+            std::tanh(activ_temp_data[Offset(activ_temp_shape, b, h, w, 1 * output_depth + c)]);
+          const float forget_gate =
+            1.f /
+            (1.f +
+             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 2 * output_depth + c)]));
+          const float output_gate =
+            1.f /
+            (1.f +
+             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 3 * output_depth + c)]));
+          const float new_state =
+            input_gate * new_input +
+            forget_gate * prev_state_data[Offset(prev_state_shape, b, h, w, c)];
+          output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state;
+          output_activ_data[Offset(output_activ_shape, b, h, w, c)] =
+            output_gate * std::tanh(new_state);
+        }
+      }
+    }
+  }
+}
+
+// Quantized LSTM cell implementation.
+// The quantization of the input, output arrays is as follows:
+//  - The input activations are quantized as uint8 on the interval
+//    [-1, 127/128].
+//    The rationale for that is that is the natural interval for output
+//    activations (see next point) and these need to be concatenated together.
+//    We could accommodate different ranges by re-scaling, but we empirically
+//    found that setting the input activations range to be [-1, 127/128] in the
+//    first place, removing the need for re-scaling, greatly improves accuracy.
+//  - The output activations are quantized as uint8 on the interval
+//    [-1, 127/128].
+//    The rationale for that is that the definition of a LSTM cell makes them
+//    intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128]
+//    makes for simpler, more accurate fixed-point arithmetic.
+//  - The output-at-previous-timestep state array is obviously quantized as
+//    the output activations.
+//  - The internal LSTM memory (not the output-at-previous-timestep, the other
+//    internal state array) is int16-quantized and may use any power-of-two,
+//    symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call
+//    StateIntegerBits below, see the below discussion of that template
+//    parameter ("The StateIntegerBits template parameter").
+//  - The output of the internal fully-connected node is int16-quantized
+//    on the interval [-8, 8 * 32767/32768], the rationale for which is
+//    explained just below ("Why [-8, 8] for fully-connected output?").
+//
+//
+// === The StateIntegerBits template parameter ===
+//
+// The StateIntegerBits template parameter controls the fixed-point format used
+// to represent the internal memory of the LSTM cell (not the
+// output-at-previous-timestep, the other internal state array). It's currently
+// a template parameter so that the model can control that. The most typical
+// value for StateIntegerBits is 4. Other plausible values are anywhere between
+// 3 and 5. We might eventually standardize on a single supported value, e.g. 4,
+// and drop that template parameter. The reason why it can't be a runtime
+// parameter is that this controls the fixed-point format used, i.e. we need to
+// generate actually different code based on it. In particular, we generate code
+// for a fixed-point tanh() implementation for that format, which internally
+// uses a fixed-point exp() implementation, which internally uses a
+// barrel-shifter with a number of steps that depends on StateIntegerBits.
+// Another consequence of that is that a higher value of StateIntegerBits
+// results in a more expensive implementation (more barrel shifter steps
+// needed).
+//
+//
+// === Why [-8, 8] for fully-connected output? ===
+//
+// This array is only fed to Logistic and Tanh functions, for which
+// the quantized implementation will want to use fixed-point arithmetic,
+// requiring a power-of-two representation interval. Thus, we should right
+// away quantize this array to a power-of-two interval; otherwise,
+// implementation will need to rescale that, losing any benefit that a tighter
+// representation interval might otherwise yield, while introducing some
+// numerical error and computational overhead.
+//
+// Now, Logistic and Tanh
+// are nearly constant (nearly equal to their horizontal asymptotes)
+// outside of a small bounded interval around 0:
+//
+//   Logistic(4) = 1 - 1.8e-2     Tanh(4) = 1 - 6.7e-4
+//   Logistic(8) = 1 - 3.4e-4     Tanh(8) = 1 - 2.3e-7
+//   Logistic(16) = 1 - 1.1e-7    Tanh(16) = 1 - 2.5e-14
+//
+// From this, we see that clamping to [-4, 4] would be too inaccurate
+// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision)
+// while clamping to [-16, 16] would make no difference even in float32.
+// However, for a fixed-point implementation in 16-bit integers, using 5
+// integer bits to represent the [-16, 16] range would leave only 11
+// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive
+// representable values. Notice that is higher than the
+// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic.
+// Using [-8, 8] thus seems like the better compromise overall, enjoying
+// an increment of 2.4e-4 between representable values and a worst-case
+// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with
+// [-16, 16].
+//
+// Moreover, all other things being equal, it is nice to choose the narrower
+// representation range, as that makes the implementation of fixed-point
+// math functions a little cheaper (each integer bit requires an additional
+// barrel-shifter atep in the implementation of exp(-x)). That is further
+// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make
+// sense for 32-bit float or 32-bit fixed-point quantization, but we are
+// aiming for 16-bit fixed-point quantization of these internal nodes here.
+//
+template <int StateIntegerBits>
+inline void
+LstmCell(const LstmCellParams &params, const RuntimeShape &unextended_input_shape,
+         const uint8 *input_data_uint8, const RuntimeShape &unextended_prev_activ_shape,
+         const uint8 *prev_activ_data_uint8, const RuntimeShape &weights_shape,
+         const uint8 *weights_data_uint8, const RuntimeShape &unextended_bias_shape,
+         const int32 *bias_data_int32, const RuntimeShape &unextended_prev_state_shape,
+         const int16 *prev_state_data_int16, const RuntimeShape &unextended_output_state_shape,
+         int16 *output_state_data_int16, const RuntimeShape &unextended_output_activ_shape,
+         uint8 *output_activ_data_uint8, const RuntimeShape &unextended_concat_temp_shape,
+         uint8 *concat_temp_data_uint8, const RuntimeShape &unextended_activ_temp_shape,
+         int16 *activ_temp_data_int16, void *gemmlowp_context)
+{
+  (void)gemmlowp_context; // only used in optimized code.
+  int32 weights_zero_point = params.weights_zero_point;
+  int32 accum_multiplier = params.accum_multiplier;
+  int accum_shift = params.accum_shift;
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+  const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+  const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+  const RuntimeShape output_state_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+  const RuntimeShape output_activ_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+  const RuntimeShape concat_temp_shape =
+    RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+  const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+  // Gather dimensions information, and perform consistency checks.
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int outer_size = MatchingFlatSizeSkipDim(input_shape, 3, prev_activ_shape, prev_state_shape,
+                                                 output_state_shape, output_activ_shape);
+  const int input_depth = input_shape.Dims(3);
+  const int prev_activ_depth = prev_activ_shape.Dims(3);
+  const int total_input_depth = prev_activ_depth + input_depth;
+  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+  const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+  TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+  const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+                                       3, output_activ_shape, 3);
+  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+  const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
+  const int fc_output_depth =
+    MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
+  const int fc_accum_depth = total_input_depth;
+  TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
+
+  // Depth-concatenate prev_activ and input data together.
+  uint8 const *concat_input_arrays_data[2] = {input_data_uint8, prev_activ_data_uint8};
+  const RuntimeShape *concat_input_arrays_shapes[2] = {&input_shape, &prev_activ_shape};
+  tflite::ConcatenationParams concat_params;
+  concat_params.axis = 3;
+  concat_params.inputs_count = 2;
+  Concatenation(concat_params, concat_input_arrays_shapes, concat_input_arrays_data,
+                concat_temp_shape, concat_temp_data_uint8);
+
+  // Implementation of the fully connected node inside the LSTM cell.
+  // The operands are 8-bit integers, the accumulators are internally 32bit
+  // integers, and the output is 16-bit fixed-point with 3 integer bits so
+  // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that
+  // is explained in the function comment above.
+  for (int b = 0; b < fc_batches; ++b)
+  {
+    for (int out_c = 0; out_c < fc_output_depth; ++out_c)
+    {
+      // Internal accumulation.
+      // Initialize accumulator with the bias-value.
+      int32 accum = bias_data_int32[out_c];
+      // Accumulation loop.
+      for (int d = 0; d < fc_accum_depth; ++d)
+      {
+        int16 input_val = concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
+        int16 weights_val = weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
+        accum += input_val * weights_val;
+      }
+      // Down-scale the final int32 accumulator to the scale used by our
+      // (16-bit, using 3 integer bits) fixed-point format. The quantized
+      // multiplier and shift here have been pre-computed offline
+      // (e.g. by toco).
+      accum = MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift);
+      // Saturate, cast to int16, and store to the temporary activations array.
+      accum = std::max(-32768, std::min(32767, static_cast<int>(accum)));
+      activ_temp_data_int16[out_c + fc_output_depth * b] = accum;
+    }
+  }
+
+  // Rest of the LSTM cell: tanh and logistic math functions, and some adds
+  // and muls, all done in 16-bit fixed-point.
+  for (int b = 0; b < outer_size; ++b)
+  {
+    for (int c = 0; c < output_depth; ++c)
+    {
+      // Define the fixed-point data types that we will use here. All use
+      // int16 as the underlying integer type i.e. all are 16-bit fixed-point.
+      // They only differ by the number of integral vs. fractional bits,
+      // determining the range of values that they can represent.
+      //
+      // F0 uses 0 integer bits, range [-1, 1].
+      // This is the return type of math functions such as tanh, logistic,
+      // whose range is in [-1, 1].
+      using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+      // F3 uses 3 integer bits, range [-8, 8].
+      // This is the range of the previous fully-connected node's output,
+      // which is our input here.
+      using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
+      // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits,
+      // 2^StateIntegerBits]. It's used to represent the internal state, whose
+      // number of integer bits is currently dictated by the model. See comment
+      // on the StateIntegerBits template parameter above.
+      using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>;
+      // Implementation of input gate, using fixed-point logistic function.
+      F3 input_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]);
+      F0 input_gate_output = gemmlowp::logistic(input_gate_input);
+      // Implementation of input modulation gate, using fixed-point tanh
+      // function.
+      F3 input_modulation_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]);
+      F0 input_modulation_gate_output = gemmlowp::tanh(input_modulation_gate_input);
+      // Implementation of forget gate, using fixed-point logistic function.
+      F3 forget_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]);
+      F0 forget_gate_output = gemmlowp::logistic(forget_gate_input);
+      // Implementation of output gate, using fixed-point logistic function.
+      F3 output_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]);
+      F0 output_gate_output = gemmlowp::logistic(output_gate_input);
+      // Implementation of internal multiplication nodes, still in fixed-point.
+      F0 input_times_input_modulation = input_gate_output * input_modulation_gate_output;
+      FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]);
+      FS prev_state_times_forget_state = forget_gate_output * prev_state;
+      // Implementation of internal addition node, saturating.
+      FS new_state =
+        gemmlowp::SaturatingAdd(gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation),
+                                prev_state_times_forget_state);
+      // Implementation of last internal Tanh node, still in fixed-point.
+      // Since a Tanh fixed-point implementation is specialized for a given
+      // number or integer bits, and each specialization can have a substantial
+      // code size, and we already used above a Tanh on an input with 3 integer
+      // bits, and per the table in the above function comment there is no
+      // significant accuracy to be lost by clamping to [-8, +8] for a
+      // 3-integer-bits representation, let us just do that. This helps people
+      // porting this to targets where code footprint must be minimized.
+      F3 new_state_f3 = gemmlowp::Rescale<3>(new_state);
+      F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3);
+      // Store the new internal state back to memory, as 16-bit integers.
+      // Note: here we store the original value with StateIntegerBits, not
+      // the rescaled 3-integer-bits value fed to tanh.
+      output_state_data_int16[b * output_depth + c] = new_state.raw();
+      // Down-scale the output activations to 8-bit integers, saturating,
+      // and store back to memory.
+      int16 rescaled_output_activ = gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8);
+      int16 clamped_output_activ =
+        std::max<int16>(-128, std::min<int16>(127, rescaled_output_activ));
+      output_activ_data_uint8[b * output_depth + c] = 128 + clamped_output_activ;
+    }
+  }
+}
+
+template <typename Scalar>
+void Split(const SplitParams &params, const RuntimeShape &input_shape, const Scalar *input_data,
+           const RuntimeShape *const *output_shapes, Scalar *const *output_data)
+{
+  ruy::profiler::ScopeLabel label("Split");
+  const int split_dimensions = input_shape.DimensionsCount();
+  int axis = params.axis < 0 ? params.axis + split_dimensions : params.axis;
+  int outputs_count = params.num_split;
+  TFLITE_DCHECK_LT(axis, split_dimensions);
+
+  int64_t split_size = 0;
+  for (int i = 0; i < outputs_count; i++)
+  {
+    TFLITE_DCHECK_EQ(output_shapes[i]->DimensionsCount(), split_dimensions);
+    for (int j = 0; j < split_dimensions; j++)
+    {
+      if (j != axis)
+      {
+        MatchingDim(*output_shapes[i], j, input_shape, j);
+      }
+    }
+    split_size += output_shapes[i]->Dims(axis);
+  }
+  TFLITE_DCHECK_EQ(split_size, input_shape.Dims(axis));
+  int64_t outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+  // For all output arrays,
+  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
+  int64_t base_inner_size = 1;
+  for (int i = axis + 1; i < split_dimensions; ++i)
+  {
+    base_inner_size *= input_shape.Dims(i);
+  }
+
+  const Scalar *input_ptr = input_data;
+  for (int k = 0; k < outer_size; k++)
+  {
+    for (int i = 0; i < outputs_count; ++i)
+    {
+      const int copy_size = output_shapes[i]->Dims(axis) * base_inner_size;
+      memcpy(output_data[i] + k * copy_size, input_ptr, copy_size * sizeof(Scalar));
+      input_ptr += copy_size;
+    }
+  }
+}
+
+inline int NodeOffset(int b, int h, int w, int height, int width)
+{
+  return (b * height + h) * width + w;
+}
+
+inline void LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params,
+                                       const RuntimeShape &input_shape, const float *input_data,
+                                       const RuntimeShape &output_shape, float *output_data)
+{
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+  for (int i = 0; i < outer_size; ++i)
+  {
+    for (int c = 0; c < depth; ++c)
+    {
+      const int begin_input_c = std::max(0, static_cast<int>(c - op_params.range));
+      const int end_input_c = std::min(depth, static_cast<int>(c + op_params.range));
+      float accum = 0.f;
+      for (int input_c = begin_input_c; input_c < end_input_c; ++input_c)
+      {
+        const float input_val = input_data[i * depth + input_c];
+        accum += input_val * input_val;
+      }
+      const float multiplier = std::pow(op_params.bias + op_params.alpha * accum, -op_params.beta);
+      output_data[i * depth + c] = input_data[i * depth + c] * multiplier;
+    }
+  }
+}
+
+inline void Dequantize(const RuntimeShape &input_shape, const Eigen::half *input_data,
+                       const RuntimeShape &output_shape, float *output_data)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = static_cast<float>(input_data[i]);
+  }
+}
+
+inline void FakeQuant(const tflite::FakeQuantParams &op_params, const RuntimeShape &input_shape,
+                      const float *input_data, const RuntimeShape &output_shape, float *output_data)
+{
+  ruy::profiler::ScopeLabel label("FakeQuant");
+  float rmin = op_params.minmax.min;
+  float rmax = op_params.minmax.max;
+  int num_bits = op_params.num_bits;
+  // 0 should always be a representable value. Let's assume that the initial
+  // min,max range contains 0.
+  TFLITE_DCHECK_LE(rmin, 0.0f);
+  TFLITE_DCHECK_GE(rmax, 0.0f);
+  TFLITE_DCHECK_LT(rmin, rmax);
+
+  // Code matches tensorflow's FakeQuantWithMinMaxArgsFunctor.
+  int quant_min = 0;
+  int quant_max = (1 << num_bits) - 1;
+  float nudged_min, nudged_max, nudged_scale;
+  NudgeQuantizationRange(rmin, rmax, quant_min, quant_max, &nudged_min, &nudged_max, &nudged_scale);
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  FakeQuantizeArray(nudged_scale, nudged_min, nudged_max, input_data, output_data, flat_size);
+}
+
+// Common subroutine for both `GatherNd` and `GatherNdString`.
+struct GatherNdHelperResult
+{
+  int n_slices;
+  int slice_size;
+  int indices_nd;
+  std::vector<int> dims_to_count;
+};
+
+// Returns common values being used on both `GatherNd` and `GatherNdString`.
+inline GatherNdHelperResult GatherNdHelper(const RuntimeShape &params_shape,
+                                           const RuntimeShape &indices_shape)
+{
+  GatherNdHelperResult ret;
+  ret.n_slices = 1;
+  ret.slice_size = 1;
+  const int indices_dims = indices_shape.DimensionsCount();
+  ret.indices_nd = indices_shape.Dims(indices_dims - 1);
+  const int params_dims = params_shape.DimensionsCount();
+  for (int i = 0; i < indices_dims - 1; ++i)
+  {
+    ret.n_slices *= indices_shape.Dims(i);
+  }
+  for (int i = ret.indices_nd; i < params_dims; ++i)
+  {
+    ret.slice_size *= params_shape.Dims(i);
+  }
+
+  int remain_flat_size = params_shape.FlatSize();
+  ret.dims_to_count = std::vector<int>(ret.indices_nd, 0);
+  for (int i = 0; i < ret.indices_nd; ++i)
+  {
+    ret.dims_to_count[i] = remain_flat_size / params_shape.Dims(i);
+    remain_flat_size = ret.dims_to_count[i];
+  }
+
+  return ret;
+}
+
+template <typename ParamsT, typename IndicesT = int32>
+inline void GatherNd(const RuntimeShape &params_shape, const ParamsT *params_data,
+                     const RuntimeShape &indices_shape, const IndicesT *indices_data,
+                     const RuntimeShape &output_shape, ParamsT *output_data)
+{
+  ruy::profiler::ScopeLabel label("GatherNd");
+
+  const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+  for (int i = 0; i < res.n_slices; ++i)
+  {
+    int from_pos = 0;
+    for (int j = 0; j < res.indices_nd; ++j)
+    {
+      from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+    }
+    std::memcpy(output_data + i * res.slice_size, params_data + from_pos,
+                sizeof(ParamsT) * res.slice_size);
+  }
+}
+
+#ifndef TF_LITE_STATIC_MEMORY
+template <typename IndicesT = int32>
+inline void GatherNdString(const RuntimeShape &params_shape, const TfLiteTensor *params_data,
+                           const RuntimeShape &indices_shape, const IndicesT *indices_data,
+                           const RuntimeShape &output_shape, TfLiteTensor *output_data)
+{
+  ruy::profiler::ScopeLabel label("GatherNdString");
+
+  const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+  DynamicBuffer buffer;
+  for (int i = 0; i < res.n_slices; ++i)
+  {
+    int from_pos = 0;
+    for (int j = 0; j < res.indices_nd; ++j)
+    {
+      from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+    }
+    for (int j = 0; j < res.slice_size; ++j)
+    {
+      buffer.AddString(GetString(params_data, from_pos + j));
+    }
+  }
+  buffer.WriteToTensor(output_data, /*new_shape=*/nullptr);
+}
+#endif
+
+template <typename IndicesT, typename UpdatesT>
+inline void ScatterNd(const RuntimeShape &indices_shape, const IndicesT *indices_data,
+                      const RuntimeShape &updates_shape, const UpdatesT *updates_data,
+                      const RuntimeShape &output_shape, UpdatesT *output_data)
+{
+  ruy::profiler::ScopeLabel label("ScatterNd");
+
+  int n_slices = 1;
+  int slice_size = 1;
+  const int outer_dims = indices_shape.DimensionsCount() - 1;
+  const int indices_nd = indices_shape.Dims(outer_dims);
+  const int updates_dims = updates_shape.DimensionsCount();
+  for (int i = 0; i < outer_dims; ++i)
+  {
+    n_slices *= indices_shape.Dims(i);
+  }
+  for (int i = outer_dims; i < updates_dims; ++i)
+  {
+    slice_size *= updates_shape.Dims(i);
+  }
+
+  int output_flat_size = output_shape.FlatSize();
+  int remain_flat_size = output_flat_size;
+  std::vector<int> dims_to_count(indices_nd, 0);
+  for (int i = 0; i < indices_nd; ++i)
+  {
+    dims_to_count[i] = remain_flat_size / output_shape.Dims(i);
+    remain_flat_size = dims_to_count[i];
+  }
+
+  memset(output_data, 0, sizeof(UpdatesT) * output_flat_size);
+  for (int i = 0; i < n_slices; ++i)
+  {
+    int to_pos = 0;
+    for (int j = 0; j < indices_nd; ++j)
+    {
+      IndicesT idx = indices_data[i * indices_nd + j];
+      TFLITE_DCHECK(0 <= idx && idx < output_shape.Dims(j));
+      to_pos += idx * dims_to_count[j];
+    }
+    for (int j = 0; j < slice_size; j++)
+    {
+      output_data[to_pos + j] += updates_data[i * slice_size + j];
+    }
+  }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+                  const RuntimeShape &output_shape, SequentialTensorWriter<T> *writer)
+{
+  const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape);
+  TFLITE_DCHECK_LE(op_params.begin_count, 5);
+  TFLITE_DCHECK_LE(op_params.size_count, 5);
+  const int begin_count = op_params.begin_count;
+  const int size_count = op_params.size_count;
+  // We front-pad the begin and size vectors.
+  std::array<int, 5> start;
+  std::array<int, 5> stop;
+  for (int i = 0; i < 5; ++i)
+  {
+    int padded_i = 5 - i;
+    start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i];
+    stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1)
+                ? ext_shape.Dims(i)
+                : start[i] + op_params.size[size_count - padded_i];
+  }
+
+  for (int i0 = start[0]; i0 < stop[0]; ++i0)
+  {
+    for (int i1 = start[1]; i1 < stop[1]; ++i1)
+    {
+      for (int i2 = start[2]; i2 < stop[2]; ++i2)
+      {
+        for (int i3 = start[3]; i3 < stop[3]; ++i3)
+        {
+          for (int i4 = start[4]; i4 < stop[4]; ++i4)
+          {
+            writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4));
+          }
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+                  const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+  SequentialTensorWriter<T> writer(input_data, output_data);
+  return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+                  const TfLiteTensor *input, const RuntimeShape &output_shape, TfLiteTensor *output)
+{
+  SequentialTensorWriter<T> writer(input, output);
+  return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+             const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+  auto min_value = input2_data[0];
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = input1_data[i] > min_value ? min_value : input1_data[i];
+  }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+                    const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+  // Drop shape of second input: not needed.
+  Minimum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T>
+void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+             const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+  auto max_value = input2_data[0];
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = input1_data[i] < max_value ? max_value : input1_data[i];
+  }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+                    const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+  // Drop shape of second input: not needed.
+  Maximum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T1, typename T2, typename T3>
+void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data,
+            const RuntimeShape &output_shape, T2 *output_data)
+{
+  ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data, std::greater<T1>());
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T1, typename T2, typename T3>
+inline void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data,
+                   const RuntimeShape &input2_shape, const T3 *input2_data,
+                   const RuntimeShape &output_shape, T2 *output_data)
+{
+  // Drop shape of second input: not needed.
+  ArgMax(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename D, typename T>
+void Select(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+            const RuntimeShape &input_x_shape, const T *input_x_data,
+            const RuntimeShape &input_y_shape, const T *input_y_data,
+            const RuntimeShape &output_shape, T *output_data)
+{
+  int64_t flatsize;
+  // Allow select operator executions on mixed scalar tensors and one element
+  // tensors.
+  if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 &&
+      input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1)
+  {
+    flatsize = 1;
+  }
+  else
+  {
+    flatsize = MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape);
+  }
+  for (int64_t i = 0; i < flatsize; ++i)
+  {
+    output_data[i] = input_condition_data[i] ? input_x_data[i] : input_y_data[i];
+  }
+}
+
+template <typename D, typename T>
+void RankOneSelect(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+                   const RuntimeShape &input_x_shape, const T *input_x_data,
+                   const RuntimeShape &input_y_shape, const T *input_y_data,
+                   const RuntimeShape &output_shape, T *output_data)
+{
+  const int64_t outer_size = input_condition_shape.FlatSize();
+  int64_t inner_size;
+  if (input_condition_shape.DimensionsCount() == 0)
+  {
+    inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape);
+  }
+  else
+  {
+    TFLITE_DCHECK_EQ(MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0), outer_size);
+    inner_size = MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape);
+  }
+
+  int64_t offset = 0;
+  for (int64_t i = 0; i < outer_size; i++)
+  {
+    const T *input_data = input_condition_data[i] ? input_x_data : input_y_data;
+    memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T));
+    offset += inner_size;
+  }
+}
+
+template <typename D, typename T>
+void BroadcastSelect4DSlow(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+                           const RuntimeShape &input_x_shape, const T *input_x_data,
+                           const RuntimeShape &input_y_shape, const T *input_y_data,
+                           const RuntimeShape &output_shape, T *output_data)
+{
+  TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
+
+  const RuntimeShape extended_output_shape = RuntimeShape::ExtendedShape(4, output_shape);
+
+  NdArrayDesc<4> desc_condition;
+  NdArrayDesc<4> desc_x;
+  NdArrayDesc<4> desc_y;
+  NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape, input_y_shape,
+                                      &desc_condition, &desc_x, &desc_y);
+
+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
+  // col, channel), with extents (batches, height, width, depth), with the
+  // trailing dimension changing most rapidly (channels has the smallest
+  // stride, typically 1 element).
+  //
+  // In generated C code, we store arrays with the dimensions reversed. The
+  // first dimension has smallest stride.
+  //
+  // We name our variables by their Tensorflow convention, but generate C code
+  // nesting loops such that the innermost loop has the smallest stride for
+  // the best cache behavior.
+  for (int b = 0; b < extended_output_shape.Dims(0); ++b)
+  {
+    for (int y = 0; y < extended_output_shape.Dims(1); ++y)
+    {
+      for (int x = 0; x < extended_output_shape.Dims(2); ++x)
+      {
+        for (int c = 0; c < extended_output_shape.Dims(3); ++c)
+        {
+          const int condition_index = SubscriptToIndex(desc_condition, b, y, x, c);
+          const int x_index = SubscriptToIndex(desc_x, b, y, x, c);
+          const int y_index = SubscriptToIndex(desc_y, b, y, x, c);
+          output_data[Offset(extended_output_shape, b, y, x, c)] =
+            input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index];
+        }
+      }
+    }
+  }
+}
+
+template <typename D, typename T>
+void SelectTrueCoords(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+                      T *output_data)
+{
+  const size_t size = input_condition_shape.FlatSize();
+  if (size == 0)
+  {
+    // Dimension is zero, in which case we don't need to output.
+    return;
+  }
+  const size_t cond_rank = input_condition_shape.DimensionsCount();
+
+  std::vector<int> dims_to_count(cond_rank, 0);
+  int cur_flat_size = size;
+  for (int i = 0; i < cond_rank; ++i)
+  {
+    dims_to_count[i] = cur_flat_size / input_condition_shape.Dims(i);
+    cur_flat_size = dims_to_count[i];
+  }
+
+  int output_index = 0;
+  for (int i = 0; i < size; ++i)
+  {
+    if (input_condition_data[i])
+    {
+      // Insert the coordinate of the current item (row major) into output.
+      int flat_index = i;
+      for (int j = 0; j < cond_rank; ++j)
+      {
+        int coord_j = flat_index / dims_to_count[j];
+        output_data[output_index * cond_rank + j] = coord_j;
+        flat_index %= dims_to_count[j];
+      }
+      output_index++;
+    }
+  }
+}
+
+// For easy implementation, the indices is always a vector of size-4 vectors.
+template <typename T, typename TI>
+inline void SparseToDense(const std::vector<std::vector<TI>> &indices, const T *values,
+                          T default_value, bool value_is_scalar,
+                          const RuntimeShape &unextended_output_shape, T *output_data)
+{
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+  const int value_count = indices.size();
+
+  // First fill the output_data with default value.
+  const int num_elements = output_shape.FlatSize();
+  for (int i = 0; i < num_elements; ++i)
+  {
+    output_data[i] = default_value;
+  }
+
+  // Special handle for value is scalar case to avoid checking the boolean
+  // condition within the loop every time.
+  if (value_is_scalar)
+  {
+    for (int i = 0; i < value_count; ++i)
+    {
+      const std::vector<TI> &index = indices[i];
+      TFLITE_DCHECK_EQ(index.size(), 4);
+      const T value = *values; // just use the first value.
+      output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+    }
+    return;
+  }
+
+  // Go through the values and indices to fill the sparse values.
+  for (int i = 0; i < value_count; ++i)
+  {
+    const std::vector<TI> &index = indices[i];
+    TFLITE_DCHECK_EQ(index.size(), 4);
+    const T value = values[i];
+    output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+  }
+}
+
+template <typename T>
+inline void Pow(const RuntimeShape &input1_shape, const T *input1_data,
+                const RuntimeShape &input2_shape, const T *input2_data,
+                const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    output_data[i] = std::pow(input1_data[i], input2_data[i]);
+  }
+}
+
+template <typename T>
+inline void BroadcastPow4DSlow(const RuntimeShape &unextended_input1_shape, const T *input1_data,
+                               const RuntimeShape &unextended_input2_shape, const T *input2_data,
+                               const RuntimeShape &unextended_output_shape, T *output_data)
+{
+  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1,
+                                      &desc2);
+
+  for (int b = 0; b < output_shape.Dims(0); ++b)
+  {
+    for (int y = 0; y < output_shape.Dims(1); ++y)
+    {
+      for (int x = 0; x < output_shape.Dims(2); ++x)
+      {
+        for (int c = 0; c < output_shape.Dims(3); ++c)
+        {
+          auto out_idx = Offset(output_shape, b, y, x, c);
+          auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
+          auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
+          auto in1_val = input1_data[in1_idx];
+          auto in2_val = input2_data[in2_idx];
+          output_data[out_idx] = std::pow(in1_val, in2_val);
+        }
+      }
+    }
+  }
+}
+
+template <typename Scalar>
+void Reverse(int axis, const RuntimeShape &input_shape, const Scalar *input_data,
+             const RuntimeShape &output_shape, Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("Reverse");
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+
+  int copy_size = 1;
+  for (int i = axis + 1; i < input_shape.DimensionsCount(); ++i)
+  {
+    copy_size *= input_shape.Dims(i);
+  }
+
+  const int dims_at_axis = input_shape.Dims(axis);
+  for (int i = 0; i < outer_size; ++i)
+  {
+    for (int j = 0; j < dims_at_axis; ++j)
+    {
+      const int start_pos = (i * dims_at_axis + j) * copy_size;
+      Scalar *output_ptr = output_data + start_pos;
+      int loc = (i * dims_at_axis + dims_at_axis - j - 1) * copy_size;
+      memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+    }
+  }
+}
+
+template <typename Scalar, typename TS>
+void ReverseSequence(const TS *seq_lengths, const int seq_dim, const int batch_dim,
+                     const RuntimeShape &input_shape, const Scalar *input_data,
+                     const RuntimeShape &output_shape, Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("ReverseSequence");
+
+  int outer_size = 1;
+  int outer_dim = std::min(batch_dim, seq_dim);
+  int medium_dim = std::max(batch_dim, seq_dim);
+  for (int i = 0; i < outer_dim; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+
+  int medium_size = 1;
+  for (int i = outer_dim + 1; i < medium_dim; ++i)
+  {
+    medium_size *= input_shape.Dims(i);
+  }
+
+  int copy_size = 1;
+  for (int i = medium_dim + 1; i < input_shape.DimensionsCount(); ++i)
+  {
+    copy_size *= input_shape.Dims(i);
+  }
+
+  const int dims_at_outer_dim = input_shape.Dims(outer_dim);
+  const int dims_at_medium_dim = input_shape.Dims(medium_dim);
+
+  Scalar *output_ptr;
+  if (batch_dim > seq_dim)
+  {
+    for (int i = 0; i < outer_size; ++i)
+    {
+      for (int j = 0; j < dims_at_outer_dim; ++j)
+      {
+        const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+        for (int p = 0; p < medium_size; ++p)
+        {
+          for (int q = 0; q < dims_at_medium_dim; ++q)
+          {
+            const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+            const Scalar *in_ptr = input_data + in_pos;
+            int sl = seq_lengths[q] - 1;
+            if (j > sl)
+            {
+              output_ptr = output_data + in_pos;
+            }
+            else
+            {
+              const int out_pos_base = (i * dims_at_outer_dim + sl - j) * medium_size;
+              const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+              output_ptr = output_data + out_pos;
+            }
+            memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+          }
+        }
+      }
+    }
+  }
+  else if (batch_dim < seq_dim)
+  {
+    for (int i = 0; i < outer_size; ++i)
+    {
+      for (int j = 0; j < dims_at_outer_dim; ++j)
+      {
+        const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+        int sl = seq_lengths[j] - 1;
+        const int out_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+        for (int p = 0; p < medium_size; ++p)
+        {
+          for (int q = 0; q < dims_at_medium_dim; ++q)
+          {
+            const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+            const Scalar *in_ptr = input_data + in_pos;
+            if (q > sl)
+            {
+              output_ptr = output_data + in_pos;
+            }
+            else
+            {
+              const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + sl - q) * copy_size;
+              output_ptr = output_data + out_pos;
+            }
+            memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+          }
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+inline void SegmentSum(const RuntimeShape &input_shape, const T *input_data,
+                       const RuntimeShape &segment_ids_shape, const int32_t *segment_ids_data,
+                       const RuntimeShape &output_shape, T *output_data)
+{
+  const int segment_flat_size = MatchingFlatSizeSkipDim(input_shape, 0, output_shape);
+
+  memset(output_data, 0, sizeof(T) * output_shape.FlatSize());
+
+  for (int i = 0; i < input_shape.Dims(0); i++)
+  {
+    int output_index = segment_ids_data[i];
+    for (int j = 0; j < segment_flat_size; ++j)
+    {
+      output_data[output_index * segment_flat_size + j] += input_data[i * segment_flat_size + j];
+    }
+  }
+}
+
+} // namespace reference_ops
+} // namespace tflite
+
+#endif // LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
diff --git a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
index 428b15ee0..1e6c41ecc 100644
--- a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
+++ b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
@@ -13,6 +13,7 @@ REGISTER_KERNEL(Div)
 REGISTER_KERNEL(Elu)
 REGISTER_KERNEL(Exp)
 REGISTER_KERNEL(ExpandDims)
+REGISTER_KERNEL(Fill)
 REGISTER_KERNEL(Floor)
 REGISTER_KERNEL(FloorDiv)
 REGISTER_KERNEL(Equal)
@@ -48,6 +49,7 @@ REGISTER_KERNEL(PadV2)
 REGISTER_KERNEL(Pow)
 REGISTER_KERNEL(PRelu)
 REGISTER_KERNEL(Quantize)
+REGISTER_KERNEL(ReduceMax)
 REGISTER_KERNEL(Relu)
 REGISTER_KERNEL(Relu6)
 REGISTER_KERNEL(Reshape)
@@ -55,6 +57,7 @@ REGISTER_KERNEL(ResizeBilinear)
 REGISTER_KERNEL(ResizeNearestNeighbor)
 REGISTER_KERNEL(ReverseV2)
 REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Shape)
 REGISTER_KERNEL(Slice)
 REGISTER_KERNEL(Softmax)
 REGISTER_KERNEL(SpaceToBatchND)
diff --git a/compiler/luci-interpreter/pal/linux/PALreference_ops.h b/compiler/luci-interpreter/pal/linux/PALreference_ops.h
new file mode 100644
index 000000000..825ebfe8e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALreference_ops.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+#define LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#endif // LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
diff --git a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
index d134a6b95..f0df58db3 100644
--- a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
+++ b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
@@ -12,6 +12,7 @@ REGISTER_KERNEL(Div)
 REGISTER_KERNEL(Elu)
 REGISTER_KERNEL(Exp)
 REGISTER_KERNEL(ExpandDims)
+REGISTER_KERNEL(Fill)
 REGISTER_KERNEL(Floor)
 REGISTER_KERNEL(FloorDiv)
 REGISTER_KERNEL(Equal)
@@ -44,6 +45,7 @@ REGISTER_KERNEL(Reshape)
 REGISTER_KERNEL(ResizeBilinear)
 REGISTER_KERNEL(ResizeNearestNeighbor)
 REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Shape)
 REGISTER_KERNEL(Softmax)
 REGISTER_KERNEL(SpaceToBatchND)
 REGISTER_KERNEL(SpaceToDepth)
diff --git a/compiler/luci-interpreter/pal/mcu/PALDequantize.h b/compiler/luci-interpreter/pal/mcu/PALDequantize.h
index 15ff0327b..efa6b167e 100644
--- a/compiler/luci-interpreter/pal/mcu/PALDequantize.h
+++ b/compiler/luci-interpreter/pal/mcu/PALDequantize.h
@@ -18,7 +18,7 @@
 #define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
 
 #include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
-#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+#include "PALreference_ops.h"
 
 namespace luci_interpreter_pal
 {
diff --git a/compiler/luci-interpreter/pal/mcu/PALQuantize.h b/compiler/luci-interpreter/pal/mcu/PALQuantize.h
index 6046789ae..effb85d54 100644
--- a/compiler/luci-interpreter/pal/mcu/PALQuantize.h
+++ b/compiler/luci-interpreter/pal/mcu/PALQuantize.h
@@ -17,7 +17,7 @@
 #ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
 #define LUCI_INTERPRETER_PAL_QUANTIZE_H
 
-#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+#include "PALreference_ops.h"
 
 namespace luci_interpreter_pal
 {
diff --git a/compiler/luci-interpreter/pal/mcu/PALreference_ops.h b/compiler/luci-interpreter/pal/mcu/PALreference_ops.h
new file mode 100644
index 000000000..62c720937
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALreference_ops.h
@@ -0,0 +1,1556 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+#define LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <type_traits>
+
+#include "third_party/eigen3/Eigen/Core"
+#include "fixedpoint/fixedpoint.h"
+#include "ruy/profiler/instrumentation.h" // from @ruy
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/add.h"
+#include "tensorflow/lite/kernels/internal/reference/add_n.h"
+#include "tensorflow/lite/kernels/internal/reference/arg_min_max.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_matmul.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
+#include "tensorflow/lite/kernels/internal/reference/cast.h"
+#include "tensorflow/lite/kernels/internal/reference/ceil.h"
+#include "tensorflow/lite/kernels/internal/reference/comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/concatenation.h"
+#include "tensorflow/lite/kernels/internal/reference/conv.h"
+#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h"
+#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/div.h"
+#include "tensorflow/lite/kernels/internal/reference/elu.h"
+#include "tensorflow/lite/kernels/internal/reference/exp.h"
+#include "tensorflow/lite/kernels/internal/reference/fill.h"
+#include "tensorflow/lite/kernels/internal/reference/floor.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_div.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_mod.h"
+#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
+#include "tensorflow/lite/kernels/internal/reference/gather.h"
+#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
+#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
+#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h"
+#include "tensorflow/lite/kernels/internal/reference/log_softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h"
+#include "tensorflow/lite/kernels/internal/reference/mul.h"
+#include "tensorflow/lite/kernels/internal/reference/neg.h"
+#include "tensorflow/lite/kernels/internal/reference/pad.h"
+#include "tensorflow/lite/kernels/internal/reference/pooling.h"
+#include "tensorflow/lite/kernels/internal/reference/prelu.h"
+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "tensorflow/lite/kernels/internal/reference/quantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reduce.h"
+#include "tensorflow/lite/kernels/internal/reference/requantize.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_bilinear.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h"
+#include "tensorflow/lite/kernels/internal/reference/round.h"
+#include "tensorflow/lite/kernels/internal/reference/softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_depth.h"
+#include "tensorflow/lite/kernels/internal/reference/strided_slice.h"
+#include "tensorflow/lite/kernels/internal/reference/string_comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/sub.h"
+#include "tensorflow/lite/kernels/internal/reference/tanh.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose_conv.h"
+#include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
+#include "tensorflow/lite/kernels/internal/tensor.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+namespace tflite
+{
+
+namespace reference_ops
+{
+
+template <typename T>
+inline void Relu(const RuntimeShape &input_shape, const T *input_data,
+                 const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const T val = input_data[i];
+    const T lower = 0;
+    const T clamped = val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+template <typename T>
+inline void Relu1(const RuntimeShape &input_shape, const T *input_data,
+                  const RuntimeShape &output_shape, T *output_data)
+{
+  ruy::profiler::ScopeLabel label("Relu1 (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const T val = input_data[i];
+    const T upper = 1;
+    const T lower = -1;
+    const T clamped = val > upper ? upper : val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+inline void Relu6(const RuntimeShape &input_shape, const float *input_data,
+                  const RuntimeShape &output_shape, float *output_data)
+{
+  ruy::profiler::ScopeLabel label("Relu6 (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const float val = input_data[i];
+    const float upper = 6;
+    const float lower = 0;
+    const float clamped = val > upper ? upper : val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ReluParams &params, const RuntimeShape &input_shape,
+                  const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+  ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const int32 val = static_cast<int32_t>(input_data[i]);
+    int32 clamped = params.output_offset + MultiplyByQuantizedMultiplier(val - params.input_offset,
+                                                                         params.output_multiplier,
+                                                                         params.output_shift);
+    clamped = std::max(params.quantized_activation_min, clamped);
+    clamped = std::min(params.quantized_activation_max, clamped);
+    output_data[i] = static_cast<T>(clamped);
+  }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ActivationParams &params, const RuntimeShape &input_shape,
+                  const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+  ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  const T max_value = params.quantized_activation_max;
+  const T min_value = params.quantized_activation_min;
+  for (int i = 0; i < flat_size; ++i)
+  {
+    const T val = input_data[i];
+    const T clamped = val > max_value ? max_value : val < min_value ? min_value : val;
+    output_data[i] = clamped;
+  }
+}
+
+// TODO(jiawen): We can implement BroadcastMul on buffers of arbitrary
+// dimensionality if the runtime code does a single loop over one dimension
+// that handles broadcasting as the base case. The code generator would then
+// generate max(D1, D2) nested for loops.
+inline void BroadcastMulFivefold(const ArithmeticParams &unswitched_params,
+                                 const RuntimeShape &unswitched_input1_shape,
+                                 const uint8 *unswitched_input1_data,
+                                 const RuntimeShape &unswitched_input2_shape,
+                                 const uint8 *unswitched_input2_data,
+                                 const RuntimeShape &output_shape, uint8 *output_data)
+{
+  ArithmeticParams switched_params = unswitched_params;
+  switched_params.input1_offset = unswitched_params.input2_offset;
+  switched_params.input2_offset = unswitched_params.input1_offset;
+
+  const bool use_unswitched = unswitched_params.broadcast_category ==
+                              tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
+
+  const ArithmeticParams &params = use_unswitched ? unswitched_params : switched_params;
+  const uint8 *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data;
+  const uint8 *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data;
+
+  // Fivefold nested loops. The second input resets its position for each
+  // iteration of the second loop. The first input resets its position at the
+  // beginning of the fourth loop. The innermost loop is an elementwise Mul of
+  // sections of the arrays.
+  uint8 *output_data_ptr = output_data;
+  const uint8 *input1_data_ptr = input1_data;
+  const uint8 *input2_data_reset = input2_data;
+  int y0 = params.broadcast_shape[0];
+  int y1 = params.broadcast_shape[1];
+  int y2 = params.broadcast_shape[2];
+  int y3 = params.broadcast_shape[3];
+  int y4 = params.broadcast_shape[4];
+  for (int i0 = 0; i0 < y0; ++i0)
+  {
+    const uint8 *input2_data_ptr;
+    for (int i1 = 0; i1 < y1; ++i1)
+    {
+      input2_data_ptr = input2_data_reset;
+      for (int i2 = 0; i2 < y2; ++i2)
+      {
+        for (int i3 = 0; i3 < y3; ++i3)
+        {
+          MulElementwise(y4, params, input1_data_ptr, input2_data_ptr, output_data_ptr);
+          input2_data_ptr += y4;
+          output_data_ptr += y4;
+        }
+        input1_data_ptr += y4;
+      }
+    }
+    input2_data_reset = input2_data_ptr;
+  }
+}
+
+inline void Mul(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+                const int16 *input1_data, const RuntimeShape &input2_shape,
+                const int16 *input2_data, const RuntimeShape &output_shape, int16 *output_data)
+{
+  ruy::profiler::ScopeLabel label("Mul/Int16");
+
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+  for (int i = 0; i < flat_size; i++)
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+    F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+    output_data[i] = unclamped_result.raw();
+  }
+}
+
+inline void Mul(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+                const int16 *input1_data, const RuntimeShape &input2_shape,
+                const int16 *input2_data, const RuntimeShape &output_shape, uint8 *output_data)
+{
+  ruy::profiler::ScopeLabel label("Mul/Int16Uint8");
+  int32 output_offset = params.output_offset;
+  int32 output_activation_min = params.quantized_activation_min;
+  int32 output_activation_max = params.quantized_activation_max;
+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+  for (int i = 0; i < flat_size; i++)
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+    F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+    int16 rescaled_result = gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
+    int16 clamped_result = std::min<int16>(output_activation_max - output_offset, rescaled_result);
+    clamped_result = std::max<int16>(output_activation_min - output_offset, clamped_result);
+    output_data[i] = output_offset + clamped_result;
+  }
+}
+
+inline void Sub16(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+                  const int16_t *input1_data, const RuntimeShape &input2_shape,
+                  const int16_t *input2_data, const RuntimeShape &output_shape,
+                  int16_t *output_data)
+{
+  ruy::profiler::ScopeLabel label("Sub/Int16");
+  const int input1_shift = params.input1_shift;
+  const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+  const int16 output_activation_min = params.quantized_activation_min;
+  const int16 output_activation_max = params.quantized_activation_max;
+
+  TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
+  TFLITE_DCHECK_LE(input1_shift, 0);
+  TFLITE_DCHECK_LE(params.input2_shift, 0);
+  const int16 *not_shift_input = input1_shift == 0 ? input1_data : input2_data;
+  const int16 *shift_input = input1_shift == 0 ? input2_data : input1_data;
+  const int input_right_shift = input1_shift == 0 ? -params.input2_shift : -input1_shift;
+
+  if (input1_shift == 0)
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+    for (int i = 0; i < flat_size; ++i)
+    {
+      F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+      F0 scaled_input =
+        F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+      F0 result = SaturatingSub(input_ready_scaled, scaled_input);
+      const int16 raw_output = result.raw();
+      const int16 clamped_output =
+        std::min(output_activation_max, std::max(output_activation_min, raw_output));
+      output_data[i] = clamped_output;
+    }
+  }
+  else
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+    for (int i = 0; i < flat_size; ++i)
+    {
+      F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+      F0 scaled_input =
+        F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+      F0 result = SaturatingSub(scaled_input, input_ready_scaled);
+      const int16 raw_output = result.raw();
+      const int16 clamped_output =
+        std::min(output_activation_max, std::max(output_activation_min, raw_output));
+      output_data[i] = clamped_output;
+    }
+  }
+}
+
+template <typename Scalar>
+void Pack(const PackParams &params, const RuntimeShape *const *input_shapes,
+          const Scalar *const *input_data, const RuntimeShape &output_shape, Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("Pack");
+  const int dimensions = output_shape.DimensionsCount();
+  int axis = params.axis;
+  int inputs_count = params.inputs_count;
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; i++)
+  {
+    outer_size *= output_shape.Dims(i);
+  }
+  int copy_size = 1;
+  for (int i = params.axis + 1; i < dimensions; i++)
+  {
+    copy_size *= output_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+  for (int i = 0; i < inputs_count; ++i)
+  {
+    for (int k = 0; k < outer_size; k++)
+    {
+      const Scalar *input_ptr = input_data[i] + copy_size * k;
+      int loc = k * inputs_count * copy_size + i * copy_size;
+      memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar));
+    }
+  }
+}
+
+template <typename Scalar>
+void Unpack(const UnpackParams &params, const RuntimeShape &input_shape, const Scalar *input_data,
+            const RuntimeShape &output_shape, Scalar *const *output_datas)
+{
+  ruy::profiler::ScopeLabel label("Unpack");
+  const int dimensions = input_shape.DimensionsCount();
+  const int outputs_count = params.num_split;
+
+  int outer_size = 1;
+  int axis = params.axis;
+  if (axis < 0)
+  {
+    axis += dimensions;
+  }
+  TFLITE_DCHECK_GE(axis, 0);
+  TFLITE_DCHECK_LT(axis, dimensions);
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+  int copy_size = 1;
+  for (int i = axis + 1; i < dimensions; ++i)
+  {
+    copy_size *= input_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size);
+
+  for (int i = 0; i < outputs_count; ++i)
+  {
+    for (int k = 0; k < outer_size; k++)
+    {
+      Scalar *output_ptr = output_datas[i] + copy_size * k;
+      int loc = k * outputs_count * copy_size + i * copy_size;
+      memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+    }
+  }
+}
+
+template <typename Scalar>
+void PackWithScaling(const PackParams &params, const RuntimeShape *const *input_shapes,
+                     const uint8 *const *input_data, const RuntimeShape &output_shape,
+                     uint8 *output_data)
+{
+  ruy::profiler::ScopeLabel label("PackWithScaling");
+  const int dimensions = output_shape.DimensionsCount();
+  int axis = params.axis;
+  const int32 *input_zeropoint = params.input_zeropoint;
+  const float *input_scale = params.input_scale;
+  int inputs_count = params.inputs_count;
+  const int32 output_zeropoint = params.output_zeropoint;
+  const float output_scale = params.output_scale;
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; i++)
+  {
+    outer_size *= output_shape.Dims(i);
+  }
+  int copy_size = 1;
+  for (int i = axis + 1; i < dimensions; i++)
+  {
+    copy_size *= output_shape.Dims(i);
+  }
+  TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+  Scalar *output_ptr = output_data;
+  const float inverse_output_scale = 1.f / output_scale;
+  for (int k = 0; k < outer_size; k++)
+  {
+    for (int i = 0; i < inputs_count; ++i)
+    {
+      if (input_zeropoint[i] == output_zeropoint && input_scale[i] == output_scale)
+      {
+        memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar));
+      }
+      else
+      {
+        assert(false);
+        const float scale = input_scale[i] * inverse_output_scale;
+        const float bias = -input_zeropoint[i] * scale;
+        auto input_ptr = input_data[i];
+        for (int j = 0; j < copy_size; ++j)
+        {
+          const int value =
+            static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
+          output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
+        }
+      }
+      output_ptr += copy_size;
+    }
+  }
+}
+
+template <typename Scalar>
+void DepthConcatenation(const ConcatenationParams &params, const RuntimeShape *const *input_shapes,
+                        const Scalar *const *input_data, const RuntimeShape &output_shape,
+                        Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("DepthConcatenation");
+  auto params_copy = params;
+  params_copy.axis = 3;
+  Concatenation(params_copy, input_shapes, input_data, output_shape, output_data);
+}
+
+inline void LstmCell(const LstmCellParams &params, const RuntimeShape &unextended_input_shape,
+                     const float *input_data, const RuntimeShape &unextended_prev_activ_shape,
+                     const float *prev_activ_data, const RuntimeShape &weights_shape,
+                     const float *weights_data, const RuntimeShape &unextended_bias_shape,
+                     const float *bias_data, const RuntimeShape &unextended_prev_state_shape,
+                     const float *prev_state_data,
+                     const RuntimeShape &unextended_output_state_shape, float *output_state_data,
+                     const RuntimeShape &unextended_output_activ_shape, float *output_activ_data,
+                     const RuntimeShape &unextended_concat_temp_shape, float *concat_temp_data,
+                     const RuntimeShape &unextended_activ_temp_shape, float *activ_temp_data)
+{
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+  const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+  const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+  const RuntimeShape output_state_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+  const RuntimeShape output_activ_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+  const RuntimeShape concat_temp_shape =
+    RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+  const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int batches = MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
+                                  output_state_shape, 0, output_activ_shape, 0);
+  const int height = MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
+                                 output_state_shape, 1, output_activ_shape, 1);
+  const int width = MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
+                                output_state_shape, 2, output_activ_shape, 2);
+  const int input_depth = input_shape.Dims(3);
+  const int prev_activ_depth = prev_activ_shape.Dims(3);
+  const int total_input_depth = prev_activ_depth + input_depth;
+  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+  const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+  TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+  const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+                                       3, output_activ_shape, 3);
+  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+
+  // Concatenate prev_activ and input data together
+  std::vector<float const *> concat_input_arrays_data;
+  std::vector<RuntimeShape const *> concat_input_arrays_shapes;
+  concat_input_arrays_data.push_back(input_data);
+  concat_input_arrays_data.push_back(prev_activ_data);
+  concat_input_arrays_shapes.push_back(&input_shape);
+  concat_input_arrays_shapes.push_back(&prev_activ_shape);
+  tflite::ConcatenationParams concat_params;
+  concat_params.axis = 3;
+  concat_params.inputs_count = concat_input_arrays_data.size();
+  Concatenation(concat_params, &(concat_input_arrays_shapes[0]), &(concat_input_arrays_data[0]),
+                concat_temp_shape, concat_temp_data);
+
+  // Fully connected
+  tflite::FullyConnectedParams fc_params;
+  fc_params.float_activation_min = std::numeric_limits<float>::lowest();
+  fc_params.float_activation_max = std::numeric_limits<float>::max();
+  FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape, weights_data,
+                 bias_shape, bias_data, activ_temp_shape, activ_temp_data);
+
+  // Memory state update (the LSTM "guts")
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int w = 0; w < width; ++w)
+    {
+      for (int h = 0; h < height; ++h)
+      {
+        for (int c = 0; c < output_depth; ++c)
+        {
+          const float input_gate =
+            1.f /
+            (1.f +
+             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 0 * output_depth + c)]));
+          const float new_input =
+            std::tanh(activ_temp_data[Offset(activ_temp_shape, b, h, w, 1 * output_depth + c)]);
+          const float forget_gate =
+            1.f /
+            (1.f +
+             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 2 * output_depth + c)]));
+          const float output_gate =
+            1.f /
+            (1.f +
+             std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 3 * output_depth + c)]));
+          const float new_state =
+            input_gate * new_input +
+            forget_gate * prev_state_data[Offset(prev_state_shape, b, h, w, c)];
+          output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state;
+          output_activ_data[Offset(output_activ_shape, b, h, w, c)] =
+            output_gate * std::tanh(new_state);
+        }
+      }
+    }
+  }
+}
+
+// Quantized LSTM cell implementation.
+// The quantization of the input, output arrays is as follows:
+//  - The input activations are quantized as uint8 on the interval
+//    [-1, 127/128].
+//    The rationale for that is that is the natural interval for output
+//    activations (see next point) and these need to be concatenated together.
+//    We could accommodate different ranges by re-scaling, but we empirically
+//    found that setting the input activations range to be [-1, 127/128] in the
+//    first place, removing the need for re-scaling, greatly improves accuracy.
+//  - The output activations are quantized as uint8 on the interval
+//    [-1, 127/128].
+//    The rationale for that is that the definition of a LSTM cell makes them
+//    intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128]
+//    makes for simpler, more accurate fixed-point arithmetic.
+//  - The output-at-previous-timestep state array is obviously quantized as
+//    the output activations.
+//  - The internal LSTM memory (not the output-at-previous-timestep, the other
+//    internal state array) is int16-quantized and may use any power-of-two,
+//    symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call
+//    StateIntegerBits below, see the below discussion of that template
+//    parameter ("The StateIntegerBits template parameter").
+//  - The output of the internal fully-connected node is int16-quantized
+//    on the interval [-8, 8 * 32767/32768], the rationale for which is
+//    explained just below ("Why [-8, 8] for fully-connected output?").
+//
+//
+// === The StateIntegerBits template parameter ===
+//
+// The StateIntegerBits template parameter controls the fixed-point format used
+// to represent the internal memory of the LSTM cell (not the
+// output-at-previous-timestep, the other internal state array). It's currently
+// a template parameter so that the model can control that. The most typical
+// value for StateIntegerBits is 4. Other plausible values are anywhere between
+// 3 and 5. We might eventually standardize on a single supported value, e.g. 4,
+// and drop that template parameter. The reason why it can't be a runtime
+// parameter is that this controls the fixed-point format used, i.e. we need to
+// generate actually different code based on it. In particular, we generate code
+// for a fixed-point tanh() implementation for that format, which internally
+// uses a fixed-point exp() implementation, which internally uses a
+// barrel-shifter with a number of steps that depends on StateIntegerBits.
+// Another consequence of that is that a higher value of StateIntegerBits
+// results in a more expensive implementation (more barrel shifter steps
+// needed).
+//
+//
+// === Why [-8, 8] for fully-connected output? ===
+//
+// This array is only fed to Logistic and Tanh functions, for which
+// the quantized implementation will want to use fixed-point arithmetic,
+// requiring a power-of-two representation interval. Thus, we should right
+// away quantize this array to a power-of-two interval; otherwise,
+// implementation will need to rescale that, losing any benefit that a tighter
+// representation interval might otherwise yield, while introducing some
+// numerical error and computational overhead.
+//
+// Now, Logistic and Tanh
+// are nearly constant (nearly equal to their horizontal asymptotes)
+// outside of a small bounded interval around 0:
+//
+//   Logistic(4) = 1 - 1.8e-2     Tanh(4) = 1 - 6.7e-4
+//   Logistic(8) = 1 - 3.4e-4     Tanh(8) = 1 - 2.3e-7
+//   Logistic(16) = 1 - 1.1e-7    Tanh(16) = 1 - 2.5e-14
+//
+// From this, we see that clamping to [-4, 4] would be too inaccurate
+// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision)
+// while clamping to [-16, 16] would make no difference even in float32.
+// However, for a fixed-point implementation in 16-bit integers, using 5
+// integer bits to represent the [-16, 16] range would leave only 11
+// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive
+// representable values. Notice that is higher than the
+// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic.
+// Using [-8, 8] thus seems like the better compromise overall, enjoying
+// an increment of 2.4e-4 between representable values and a worst-case
+// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with
+// [-16, 16].
+//
+// Moreover, all other things being equal, it is nice to choose the narrower
+// representation range, as that makes the implementation of fixed-point
+// math functions a little cheaper (each integer bit requires an additional
+// barrel-shifter atep in the implementation of exp(-x)). That is further
+// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make
+// sense for 32-bit float or 32-bit fixed-point quantization, but we are
+// aiming for 16-bit fixed-point quantization of these internal nodes here.
+//
+template <int StateIntegerBits>
+inline void
+LstmCell(const LstmCellParams &params, const RuntimeShape &unextended_input_shape,
+         const uint8 *input_data_uint8, const RuntimeShape &unextended_prev_activ_shape,
+         const uint8 *prev_activ_data_uint8, const RuntimeShape &weights_shape,
+         const uint8 *weights_data_uint8, const RuntimeShape &unextended_bias_shape,
+         const int32 *bias_data_int32, const RuntimeShape &unextended_prev_state_shape,
+         const int16 *prev_state_data_int16, const RuntimeShape &unextended_output_state_shape,
+         int16 *output_state_data_int16, const RuntimeShape &unextended_output_activ_shape,
+         uint8 *output_activ_data_uint8, const RuntimeShape &unextended_concat_temp_shape,
+         uint8 *concat_temp_data_uint8, const RuntimeShape &unextended_activ_temp_shape,
+         int16 *activ_temp_data_int16, void *gemmlowp_context)
+{
+  (void)gemmlowp_context; // only used in optimized code.
+  int32 weights_zero_point = params.weights_zero_point;
+  int32 accum_multiplier = params.accum_multiplier;
+  int accum_shift = params.accum_shift;
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+  const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+  const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+  const RuntimeShape output_state_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+  const RuntimeShape output_activ_shape =
+    RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+  const RuntimeShape concat_temp_shape =
+    RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+  const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+  // Gather dimensions information, and perform consistency checks.
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int outer_size = MatchingFlatSizeSkipDim(input_shape, 3, prev_activ_shape, prev_state_shape,
+                                                 output_state_shape, output_activ_shape);
+  const int input_depth = input_shape.Dims(3);
+  const int prev_activ_depth = prev_activ_shape.Dims(3);
+  const int total_input_depth = prev_activ_depth + input_depth;
+  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+  const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+  TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+  const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+                                       3, output_activ_shape, 3);
+  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+  const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
+  const int fc_output_depth =
+    MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
+  const int fc_accum_depth = total_input_depth;
+  TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
+
+  // Depth-concatenate prev_activ and input data together.
+  uint8 const *concat_input_arrays_data[2] = {input_data_uint8, prev_activ_data_uint8};
+  const RuntimeShape *concat_input_arrays_shapes[2] = {&input_shape, &prev_activ_shape};
+  tflite::ConcatenationParams concat_params;
+  concat_params.axis = 3;
+  concat_params.inputs_count = 2;
+  Concatenation(concat_params, concat_input_arrays_shapes, concat_input_arrays_data,
+                concat_temp_shape, concat_temp_data_uint8);
+
+  // Implementation of the fully connected node inside the LSTM cell.
+  // The operands are 8-bit integers, the accumulators are internally 32bit
+  // integers, and the output is 16-bit fixed-point with 3 integer bits so
+  // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that
+  // is explained in the function comment above.
+  for (int b = 0; b < fc_batches; ++b)
+  {
+    for (int out_c = 0; out_c < fc_output_depth; ++out_c)
+    {
+      // Internal accumulation.
+      // Initialize accumulator with the bias-value.
+      int32 accum = bias_data_int32[out_c];
+      // Accumulation loop.
+      for (int d = 0; d < fc_accum_depth; ++d)
+      {
+        int16 input_val = concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
+        int16 weights_val = weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
+        accum += input_val * weights_val;
+      }
+      // Down-scale the final int32 accumulator to the scale used by our
+      // (16-bit, using 3 integer bits) fixed-point format. The quantized
+      // multiplier and shift here have been pre-computed offline
+      // (e.g. by toco).
+      accum = MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift);
+      // Saturate, cast to int16, and store to the temporary activations array.
+      accum = std::max(-32768, std::min(32767, static_cast<int>(accum)));
+      activ_temp_data_int16[out_c + fc_output_depth * b] = accum;
+    }
+  }
+
+  // Rest of the LSTM cell: tanh and logistic math functions, and some adds
+  // and muls, all done in 16-bit fixed-point.
+  for (int b = 0; b < outer_size; ++b)
+  {
+    for (int c = 0; c < output_depth; ++c)
+    {
+      // Define the fixed-point data types that we will use here. All use
+      // int16 as the underlying integer type i.e. all are 16-bit fixed-point.
+      // They only differ by the number of integral vs. fractional bits,
+      // determining the range of values that they can represent.
+      //
+      // F0 uses 0 integer bits, range [-1, 1].
+      // This is the return type of math functions such as tanh, logistic,
+      // whose range is in [-1, 1].
+      using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+      // F3 uses 3 integer bits, range [-8, 8].
+      // This is the range of the previous fully-connected node's output,
+      // which is our input here.
+      using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
+      // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits,
+      // 2^StateIntegerBits]. It's used to represent the internal state, whose
+      // number of integer bits is currently dictated by the model. See comment
+      // on the StateIntegerBits template parameter above.
+      using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>;
+      // Implementation of input gate, using fixed-point logistic function.
+      F3 input_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]);
+      F0 input_gate_output = gemmlowp::logistic(input_gate_input);
+      // Implementation of input modulation gate, using fixed-point tanh
+      // function.
+      F3 input_modulation_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]);
+      F0 input_modulation_gate_output = gemmlowp::tanh(input_modulation_gate_input);
+      // Implementation of forget gate, using fixed-point logistic function.
+      F3 forget_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]);
+      F0 forget_gate_output = gemmlowp::logistic(forget_gate_input);
+      // Implementation of output gate, using fixed-point logistic function.
+      F3 output_gate_input =
+        F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]);
+      F0 output_gate_output = gemmlowp::logistic(output_gate_input);
+      // Implementation of internal multiplication nodes, still in fixed-point.
+      F0 input_times_input_modulation = input_gate_output * input_modulation_gate_output;
+      FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]);
+      FS prev_state_times_forget_state = forget_gate_output * prev_state;
+      // Implementation of internal addition node, saturating.
+      FS new_state =
+        gemmlowp::SaturatingAdd(gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation),
+                                prev_state_times_forget_state);
+      // Implementation of last internal Tanh node, still in fixed-point.
+      // Since a Tanh fixed-point implementation is specialized for a given
+      // number or integer bits, and each specialization can have a substantial
+      // code size, and we already used above a Tanh on an input with 3 integer
+      // bits, and per the table in the above function comment there is no
+      // significant accuracy to be lost by clamping to [-8, +8] for a
+      // 3-integer-bits representation, let us just do that. This helps people
+      // porting this to targets where code footprint must be minimized.
+      F3 new_state_f3 = gemmlowp::Rescale<3>(new_state);
+      F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3);
+      // Store the new internal state back to memory, as 16-bit integers.
+      // Note: here we store the original value with StateIntegerBits, not
+      // the rescaled 3-integer-bits value fed to tanh.
+      output_state_data_int16[b * output_depth + c] = new_state.raw();
+      // Down-scale the output activations to 8-bit integers, saturating,
+      // and store back to memory.
+      int16 rescaled_output_activ = gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8);
+      int16 clamped_output_activ =
+        std::max<int16>(-128, std::min<int16>(127, rescaled_output_activ));
+      output_activ_data_uint8[b * output_depth + c] = 128 + clamped_output_activ;
+    }
+  }
+}
+
+template <typename Scalar>
+void Split(const SplitParams &params, const RuntimeShape &input_shape, const Scalar *input_data,
+           const RuntimeShape *const *output_shapes, Scalar *const *output_data)
+{
+  ruy::profiler::ScopeLabel label("Split");
+  const int split_dimensions = input_shape.DimensionsCount();
+  int axis = params.axis < 0 ? params.axis + split_dimensions : params.axis;
+  int outputs_count = params.num_split;
+  TFLITE_DCHECK_LT(axis, split_dimensions);
+
+  int64_t split_size = 0;
+  for (int i = 0; i < outputs_count; i++)
+  {
+    TFLITE_DCHECK_EQ(output_shapes[i]->DimensionsCount(), split_dimensions);
+    for (int j = 0; j < split_dimensions; j++)
+    {
+      if (j != axis)
+      {
+        MatchingDim(*output_shapes[i], j, input_shape, j);
+      }
+    }
+    split_size += output_shapes[i]->Dims(axis);
+  }
+  TFLITE_DCHECK_EQ(split_size, input_shape.Dims(axis));
+  int64_t outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+  // For all output arrays,
+  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
+  int64_t base_inner_size = 1;
+  for (int i = axis + 1; i < split_dimensions; ++i)
+  {
+    base_inner_size *= input_shape.Dims(i);
+  }
+
+  const Scalar *input_ptr = input_data;
+  for (int k = 0; k < outer_size; k++)
+  {
+    for (int i = 0; i < outputs_count; ++i)
+    {
+      const int copy_size = output_shapes[i]->Dims(axis) * base_inner_size;
+      memcpy(output_data[i] + k * copy_size, input_ptr, copy_size * sizeof(Scalar));
+      input_ptr += copy_size;
+    }
+  }
+}
+
+inline int NodeOffset(int b, int h, int w, int height, int width)
+{
+  return (b * height + h) * width + w;
+}
+
+inline void LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params,
+                                       const RuntimeShape &input_shape, const float *input_data,
+                                       const RuntimeShape &output_shape, float *output_data)
+{
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+  for (int i = 0; i < outer_size; ++i)
+  {
+    for (int c = 0; c < depth; ++c)
+    {
+      const int begin_input_c = std::max(0, static_cast<int>(c - op_params.range));
+      const int end_input_c = std::min(depth, static_cast<int>(c + op_params.range));
+      float accum = 0.f;
+      for (int input_c = begin_input_c; input_c < end_input_c; ++input_c)
+      {
+        const float input_val = input_data[i * depth + input_c];
+        accum += input_val * input_val;
+      }
+      const float multiplier = std::pow(op_params.bias + op_params.alpha * accum, -op_params.beta);
+      output_data[i * depth + c] = input_data[i * depth + c] * multiplier;
+    }
+  }
+}
+
+inline void Dequantize(const RuntimeShape &input_shape, const Eigen::half *input_data,
+                       const RuntimeShape &output_shape, float *output_data)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = static_cast<float>(input_data[i]);
+  }
+}
+
+inline void FakeQuant(const tflite::FakeQuantParams &op_params, const RuntimeShape &input_shape,
+                      const float *input_data, const RuntimeShape &output_shape, float *output_data)
+{
+  ruy::profiler::ScopeLabel label("FakeQuant");
+  float rmin = op_params.minmax.min;
+  float rmax = op_params.minmax.max;
+  int num_bits = op_params.num_bits;
+  // 0 should always be a representable value. Let's assume that the initial
+  // min,max range contains 0.
+  TFLITE_DCHECK_LE(rmin, 0.0f);
+  TFLITE_DCHECK_GE(rmax, 0.0f);
+  TFLITE_DCHECK_LT(rmin, rmax);
+
+  // Code matches tensorflow's FakeQuantWithMinMaxArgsFunctor.
+  int quant_min = 0;
+  int quant_max = (1 << num_bits) - 1;
+  float nudged_min, nudged_max, nudged_scale;
+  NudgeQuantizationRange(rmin, rmax, quant_min, quant_max, &nudged_min, &nudged_max, &nudged_scale);
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  FakeQuantizeArray(nudged_scale, nudged_min, nudged_max, input_data, output_data, flat_size);
+}
+
+// Common subroutine for both `GatherNd` and `GatherNdString`.
+struct GatherNdHelperResult
+{
+  int n_slices;
+  int slice_size;
+  int indices_nd;
+  std::vector<int> dims_to_count;
+};
+
+// Returns common values being used on both `GatherNd` and `GatherNdString`.
+inline GatherNdHelperResult GatherNdHelper(const RuntimeShape &params_shape,
+                                           const RuntimeShape &indices_shape)
+{
+  GatherNdHelperResult ret;
+  ret.n_slices = 1;
+  ret.slice_size = 1;
+  const int indices_dims = indices_shape.DimensionsCount();
+  ret.indices_nd = indices_shape.Dims(indices_dims - 1);
+  const int params_dims = params_shape.DimensionsCount();
+  for (int i = 0; i < indices_dims - 1; ++i)
+  {
+    ret.n_slices *= indices_shape.Dims(i);
+  }
+  for (int i = ret.indices_nd; i < params_dims; ++i)
+  {
+    ret.slice_size *= params_shape.Dims(i);
+  }
+
+  int remain_flat_size = params_shape.FlatSize();
+  ret.dims_to_count = std::vector<int>(ret.indices_nd, 0);
+  for (int i = 0; i < ret.indices_nd; ++i)
+  {
+    ret.dims_to_count[i] = remain_flat_size / params_shape.Dims(i);
+    remain_flat_size = ret.dims_to_count[i];
+  }
+
+  return ret;
+}
+
+template <typename ParamsT, typename IndicesT = int32>
+inline void GatherNd(const RuntimeShape &params_shape, const ParamsT *params_data,
+                     const RuntimeShape &indices_shape, const IndicesT *indices_data,
+                     const RuntimeShape &output_shape, ParamsT *output_data)
+{
+  ruy::profiler::ScopeLabel label("GatherNd");
+
+  const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+  for (int i = 0; i < res.n_slices; ++i)
+  {
+    int from_pos = 0;
+    for (int j = 0; j < res.indices_nd; ++j)
+    {
+      from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+    }
+    std::memcpy(output_data + i * res.slice_size, params_data + from_pos,
+                sizeof(ParamsT) * res.slice_size);
+  }
+}
+
+#ifndef TF_LITE_STATIC_MEMORY
+template <typename IndicesT = int32>
+inline void GatherNdString(const RuntimeShape &params_shape, const TfLiteTensor *params_data,
+                           const RuntimeShape &indices_shape, const IndicesT *indices_data,
+                           const RuntimeShape &output_shape, TfLiteTensor *output_data)
+{
+  ruy::profiler::ScopeLabel label("GatherNdString");
+
+  const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+  DynamicBuffer buffer;
+  for (int i = 0; i < res.n_slices; ++i)
+  {
+    int from_pos = 0;
+    for (int j = 0; j < res.indices_nd; ++j)
+    {
+      from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+    }
+    for (int j = 0; j < res.slice_size; ++j)
+    {
+      buffer.AddString(GetString(params_data, from_pos + j));
+    }
+  }
+  buffer.WriteToTensor(output_data, /*new_shape=*/nullptr);
+}
+#endif
+
+template <typename IndicesT, typename UpdatesT>
+inline void ScatterNd(const RuntimeShape &indices_shape, const IndicesT *indices_data,
+                      const RuntimeShape &updates_shape, const UpdatesT *updates_data,
+                      const RuntimeShape &output_shape, UpdatesT *output_data)
+{
+  ruy::profiler::ScopeLabel label("ScatterNd");
+
+  int n_slices = 1;
+  int slice_size = 1;
+  const int outer_dims = indices_shape.DimensionsCount() - 1;
+  const int indices_nd = indices_shape.Dims(outer_dims);
+  const int updates_dims = updates_shape.DimensionsCount();
+  for (int i = 0; i < outer_dims; ++i)
+  {
+    n_slices *= indices_shape.Dims(i);
+  }
+  for (int i = outer_dims; i < updates_dims; ++i)
+  {
+    slice_size *= updates_shape.Dims(i);
+  }
+
+  int output_flat_size = output_shape.FlatSize();
+  int remain_flat_size = output_flat_size;
+  std::vector<int> dims_to_count(indices_nd, 0);
+  for (int i = 0; i < indices_nd; ++i)
+  {
+    dims_to_count[i] = remain_flat_size / output_shape.Dims(i);
+    remain_flat_size = dims_to_count[i];
+  }
+
+  memset(output_data, 0, sizeof(UpdatesT) * output_flat_size);
+  for (int i = 0; i < n_slices; ++i)
+  {
+    int to_pos = 0;
+    for (int j = 0; j < indices_nd; ++j)
+    {
+      IndicesT idx = indices_data[i * indices_nd + j];
+      TFLITE_DCHECK(0 <= idx && idx < output_shape.Dims(j));
+      to_pos += idx * dims_to_count[j];
+    }
+    for (int j = 0; j < slice_size; j++)
+    {
+      output_data[to_pos + j] += updates_data[i * slice_size + j];
+    }
+  }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+                  const RuntimeShape &output_shape, SequentialTensorWriter<T> *writer)
+{
+  const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape);
+  TFLITE_DCHECK_LE(op_params.begin_count, 5);
+  TFLITE_DCHECK_LE(op_params.size_count, 5);
+  const int begin_count = op_params.begin_count;
+  const int size_count = op_params.size_count;
+  // We front-pad the begin and size vectors.
+  std::array<int, 5> start;
+  std::array<int, 5> stop;
+  for (int i = 0; i < 5; ++i)
+  {
+    int padded_i = 5 - i;
+    start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i];
+    stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1)
+                ? ext_shape.Dims(i)
+                : start[i] + op_params.size[size_count - padded_i];
+  }
+
+  for (int i0 = start[0]; i0 < stop[0]; ++i0)
+  {
+    for (int i1 = start[1]; i1 < stop[1]; ++i1)
+    {
+      for (int i2 = start[2]; i2 < stop[2]; ++i2)
+      {
+        for (int i3 = start[3]; i3 < stop[3]; ++i3)
+        {
+          for (int i4 = start[4]; i4 < stop[4]; ++i4)
+          {
+            writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4));
+          }
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+                  const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+  SequentialTensorWriter<T> writer(input_data, output_data);
+  return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+                  const TfLiteTensor *input, const RuntimeShape &output_shape, TfLiteTensor *output)
+{
+  SequentialTensorWriter<T> writer(input, output);
+  return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+             const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+  auto min_value = input2_data[0];
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = input1_data[i] > min_value ? min_value : input1_data[i];
+  }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+                    const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+  // Drop shape of second input: not needed.
+  Minimum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T>
+void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+             const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+  auto max_value = input2_data[0];
+  for (int i = 0; i < flat_size; i++)
+  {
+    output_data[i] = input1_data[i] < max_value ? max_value : input1_data[i];
+  }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+                    const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+  // Drop shape of second input: not needed.
+  Maximum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T1, typename T2, typename T3>
+void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data,
+            const RuntimeShape &output_shape, T2 *output_data)
+{
+  ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data, std::greater<T1>());
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T1, typename T2, typename T3>
+inline void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data,
+                   const RuntimeShape &input2_shape, const T3 *input2_data,
+                   const RuntimeShape &output_shape, T2 *output_data)
+{
+  // Drop shape of second input: not needed.
+  ArgMax(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename D, typename T>
+void Select(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+            const RuntimeShape &input_x_shape, const T *input_x_data,
+            const RuntimeShape &input_y_shape, const T *input_y_data,
+            const RuntimeShape &output_shape, T *output_data)
+{
+  int64_t flatsize;
+  // Allow select operator executions on mixed scalar tensors and one element
+  // tensors.
+  if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 &&
+      input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1)
+  {
+    flatsize = 1;
+  }
+  else
+  {
+    flatsize = MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape);
+  }
+  for (int64_t i = 0; i < flatsize; ++i)
+  {
+    output_data[i] = input_condition_data[i] ? input_x_data[i] : input_y_data[i];
+  }
+}
+
+template <typename D, typename T>
+void RankOneSelect(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+                   const RuntimeShape &input_x_shape, const T *input_x_data,
+                   const RuntimeShape &input_y_shape, const T *input_y_data,
+                   const RuntimeShape &output_shape, T *output_data)
+{
+  const int64_t outer_size = input_condition_shape.FlatSize();
+  int64_t inner_size;
+  if (input_condition_shape.DimensionsCount() == 0)
+  {
+    inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape);
+  }
+  else
+  {
+    TFLITE_DCHECK_EQ(MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0), outer_size);
+    inner_size = MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape);
+  }
+
+  int64_t offset = 0;
+  for (int64_t i = 0; i < outer_size; i++)
+  {
+    const T *input_data = input_condition_data[i] ? input_x_data : input_y_data;
+    memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T));
+    offset += inner_size;
+  }
+}
+
+template <typename D, typename T>
+void BroadcastSelect4DSlow(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+                           const RuntimeShape &input_x_shape, const T *input_x_data,
+                           const RuntimeShape &input_y_shape, const T *input_y_data,
+                           const RuntimeShape &output_shape, T *output_data)
+{
+  TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
+
+  const RuntimeShape extended_output_shape = RuntimeShape::ExtendedShape(4, output_shape);
+
+  NdArrayDesc<4> desc_condition;
+  NdArrayDesc<4> desc_x;
+  NdArrayDesc<4> desc_y;
+  NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape, input_y_shape,
+                                      &desc_condition, &desc_x, &desc_y);
+
+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
+  // col, channel), with extents (batches, height, width, depth), with the
+  // trailing dimension changing most rapidly (channels has the smallest
+  // stride, typically 1 element).
+  //
+  // In generated C code, we store arrays with the dimensions reversed. The
+  // first dimension has smallest stride.
+  //
+  // We name our variables by their Tensorflow convention, but generate C code
+  // nesting loops such that the innermost loop has the smallest stride for
+  // the best cache behavior.
+  for (int b = 0; b < extended_output_shape.Dims(0); ++b)
+  {
+    for (int y = 0; y < extended_output_shape.Dims(1); ++y)
+    {
+      for (int x = 0; x < extended_output_shape.Dims(2); ++x)
+      {
+        for (int c = 0; c < extended_output_shape.Dims(3); ++c)
+        {
+          const int condition_index = SubscriptToIndex(desc_condition, b, y, x, c);
+          const int x_index = SubscriptToIndex(desc_x, b, y, x, c);
+          const int y_index = SubscriptToIndex(desc_y, b, y, x, c);
+          output_data[Offset(extended_output_shape, b, y, x, c)] =
+            input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index];
+        }
+      }
+    }
+  }
+}
+
+template <typename D, typename T>
+void SelectTrueCoords(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+                      T *output_data)
+{
+  const size_t size = input_condition_shape.FlatSize();
+  if (size == 0)
+  {
+    // Dimension is zero, in which case we don't need to output.
+    return;
+  }
+  const size_t cond_rank = input_condition_shape.DimensionsCount();
+
+  std::vector<int> dims_to_count(cond_rank, 0);
+  int cur_flat_size = size;
+  for (int i = 0; i < cond_rank; ++i)
+  {
+    dims_to_count[i] = cur_flat_size / input_condition_shape.Dims(i);
+    cur_flat_size = dims_to_count[i];
+  }
+
+  int output_index = 0;
+  for (int i = 0; i < size; ++i)
+  {
+    if (input_condition_data[i])
+    {
+      // Insert the coordinate of the current item (row major) into output.
+      int flat_index = i;
+      for (int j = 0; j < cond_rank; ++j)
+      {
+        int coord_j = flat_index / dims_to_count[j];
+        output_data[output_index * cond_rank + j] = coord_j;
+        flat_index %= dims_to_count[j];
+      }
+      output_index++;
+    }
+  }
+}
+
+// For easy implementation, the indices is always a vector of size-4 vectors.
+template <typename T, typename TI>
+inline void SparseToDense(const std::vector<std::vector<TI>> &indices, const T *values,
+                          T default_value, bool value_is_scalar,
+                          const RuntimeShape &unextended_output_shape, T *output_data)
+{
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+  const int value_count = indices.size();
+
+  // First fill the output_data with default value.
+  const int num_elements = output_shape.FlatSize();
+  for (int i = 0; i < num_elements; ++i)
+  {
+    output_data[i] = default_value;
+  }
+
+  // Special handle for value is scalar case to avoid checking the boolean
+  // condition within the loop every time.
+  if (value_is_scalar)
+  {
+    for (int i = 0; i < value_count; ++i)
+    {
+      const std::vector<TI> &index = indices[i];
+      TFLITE_DCHECK_EQ(index.size(), 4);
+      const T value = *values; // just use the first value.
+      output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+    }
+    return;
+  }
+
+  // Go through the values and indices to fill the sparse values.
+  for (int i = 0; i < value_count; ++i)
+  {
+    const std::vector<TI> &index = indices[i];
+    TFLITE_DCHECK_EQ(index.size(), 4);
+    const T value = values[i];
+    output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+  }
+}
+
+template <typename T>
+inline void Pow(const RuntimeShape &input1_shape, const T *input1_data,
+                const RuntimeShape &input2_shape, const T *input2_data,
+                const RuntimeShape &output_shape, T *output_data)
+{
+  const int flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i)
+  {
+    output_data[i] = std::pow(input1_data[i], input2_data[i]);
+  }
+}
+
+template <typename T>
+inline void BroadcastPow4DSlow(const RuntimeShape &unextended_input1_shape, const T *input1_data,
+                               const RuntimeShape &unextended_input2_shape, const T *input2_data,
+                               const RuntimeShape &unextended_output_shape, T *output_data)
+{
+  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1,
+                                      &desc2);
+
+  for (int b = 0; b < output_shape.Dims(0); ++b)
+  {
+    for (int y = 0; y < output_shape.Dims(1); ++y)
+    {
+      for (int x = 0; x < output_shape.Dims(2); ++x)
+      {
+        for (int c = 0; c < output_shape.Dims(3); ++c)
+        {
+          auto out_idx = Offset(output_shape, b, y, x, c);
+          auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
+          auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
+          auto in1_val = input1_data[in1_idx];
+          auto in2_val = input2_data[in2_idx];
+          output_data[out_idx] = std::pow(in1_val, in2_val);
+        }
+      }
+    }
+  }
+}
+
+template <typename Scalar>
+void Reverse(int axis, const RuntimeShape &input_shape, const Scalar *input_data,
+             const RuntimeShape &output_shape, Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("Reverse");
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+
+  int copy_size = 1;
+  for (int i = axis + 1; i < input_shape.DimensionsCount(); ++i)
+  {
+    copy_size *= input_shape.Dims(i);
+  }
+
+  const int dims_at_axis = input_shape.Dims(axis);
+  for (int i = 0; i < outer_size; ++i)
+  {
+    for (int j = 0; j < dims_at_axis; ++j)
+    {
+      const int start_pos = (i * dims_at_axis + j) * copy_size;
+      Scalar *output_ptr = output_data + start_pos;
+      int loc = (i * dims_at_axis + dims_at_axis - j - 1) * copy_size;
+      memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+    }
+  }
+}
+
+template <typename Scalar, typename TS>
+void ReverseSequence(const TS *seq_lengths, const int seq_dim, const int batch_dim,
+                     const RuntimeShape &input_shape, const Scalar *input_data,
+                     const RuntimeShape &output_shape, Scalar *output_data)
+{
+  ruy::profiler::ScopeLabel label("ReverseSequence");
+
+  int outer_size = 1;
+  int outer_dim = std::min(batch_dim, seq_dim);
+  int medium_dim = std::max(batch_dim, seq_dim);
+  for (int i = 0; i < outer_dim; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+
+  int medium_size = 1;
+  for (int i = outer_dim + 1; i < medium_dim; ++i)
+  {
+    medium_size *= input_shape.Dims(i);
+  }
+
+  int copy_size = 1;
+  for (int i = medium_dim + 1; i < input_shape.DimensionsCount(); ++i)
+  {
+    copy_size *= input_shape.Dims(i);
+  }
+
+  const int dims_at_outer_dim = input_shape.Dims(outer_dim);
+  const int dims_at_medium_dim = input_shape.Dims(medium_dim);
+
+  Scalar *output_ptr;
+  if (batch_dim > seq_dim)
+  {
+    for (int i = 0; i < outer_size; ++i)
+    {
+      for (int j = 0; j < dims_at_outer_dim; ++j)
+      {
+        const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+        for (int p = 0; p < medium_size; ++p)
+        {
+          for (int q = 0; q < dims_at_medium_dim; ++q)
+          {
+            const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+            const Scalar *in_ptr = input_data + in_pos;
+            int sl = seq_lengths[q] - 1;
+            if (j > sl)
+            {
+              output_ptr = output_data + in_pos;
+            }
+            else
+            {
+              const int out_pos_base = (i * dims_at_outer_dim + sl - j) * medium_size;
+              const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+              output_ptr = output_data + out_pos;
+            }
+            memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+          }
+        }
+      }
+    }
+  }
+  else if (batch_dim < seq_dim)
+  {
+    for (int i = 0; i < outer_size; ++i)
+    {
+      for (int j = 0; j < dims_at_outer_dim; ++j)
+      {
+        const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+        int sl = seq_lengths[j] - 1;
+        const int out_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+        for (int p = 0; p < medium_size; ++p)
+        {
+          for (int q = 0; q < dims_at_medium_dim; ++q)
+          {
+            const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+            const Scalar *in_ptr = input_data + in_pos;
+            if (q > sl)
+            {
+              output_ptr = output_data + in_pos;
+            }
+            else
+            {
+              const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + sl - q) * copy_size;
+              output_ptr = output_data + out_pos;
+            }
+            memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+          }
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+inline void SegmentSum(const RuntimeShape &input_shape, const T *input_data,
+                       const RuntimeShape &segment_ids_shape, const int32_t *segment_ids_data,
+                       const RuntimeShape &output_shape, T *output_data)
+{
+  const int segment_flat_size = MatchingFlatSizeSkipDim(input_shape, 0, output_shape);
+
+  memset(output_data, 0, sizeof(T) * output_shape.FlatSize());
+
+  for (int i = 0; i < input_shape.Dims(0); i++)
+  {
+    int output_index = segment_ids_data[i];
+    for (int j = 0; j < segment_flat_size; ++j)
+    {
+      output_data[output_index * segment_flat_size + j] += input_data[i * segment_flat_size + j];
+    }
+  }
+}
+
+} // namespace reference_ops
+} // namespace tflite
+
+#endif // LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
diff --git a/compiler/luci-interpreter/src/core/KernelParams.h b/compiler/luci-interpreter/src/core/KernelParams.h
index 958fd4b74..6c0220c62 100644
--- a/compiler/luci-interpreter/src/core/KernelParams.h
+++ b/compiler/luci-interpreter/src/core/KernelParams.h
@@ -170,6 +170,11 @@ struct ResizeNearestNeighborParams
   bool half_pixel_centers;
 };
 
+struct ShapeParams
+{
+  loco::DataType out_type;
+};
+
 struct SubParams
 {
   Activation activation;
diff --git a/compiler/luci-interpreter/src/kernels/Fill.cpp b/compiler/luci-interpreter/src/kernels/Fill.cpp
new file mode 100644
index 000000000..e09d6331a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Fill.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Fill.h"
+#include "kernels/Utils.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Fill::Fill(const Tensor *dims, const Tensor *value, Tensor *output)
+  : Kernel({dims, value}, {output})
+{
+}
+
+template <typename T> void Fill::configureShape()
+{
+  const auto dims_data = getTensorData<T>(dims());
+  Shape output_shape(dims()->shape().dim(0));
+
+  for (int i = 0; i < output_shape.num_dims(); ++i)
+  {
+    T data = dims_data[i];
+    if (data < 0)
+      throw std::runtime_error("Fill dimensions must be >= 0");
+
+    output_shape.dim(i) = data;
+  }
+
+  output()->resize(output_shape);
+}
+
+void Fill::configure()
+{
+  const auto dims_shape = dims()->shape();
+  const auto value_shape = value()->shape();
+
+  // Make sure the 1st input tensor is 1-D
+  LUCI_INTERPRETER_CHECK(dims_shape.num_dims() == 1);
+
+  // Make sure the 1st input tensor is int32 or int64
+  LUCI_INTERPRETER_CHECK(dims()->element_type() == DataType::S32 or
+                         dims()->element_type() == DataType::S64);
+
+  // Make sure the 2nd input tensor is a scalar
+  LUCI_INTERPRETER_CHECK(value_shape.num_dims() == 0)
+
+  // Check zero point and scale for S16 and S8
+  if (value()->element_type() == loco::DataType::S16 or
+      value()->element_type() == loco::DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(value()->scale() == output()->scale());
+    LUCI_INTERPRETER_CHECK(value()->zero_point() == output()->zero_point());
+
+    if (value()->element_type() == loco::DataType::S16)
+      LUCI_INTERPRETER_CHECK(value()->zero_point() == 0);
+  }
+  // Resize output
+  switch (dims()->element_type())
+  {
+    case DataType::S32:
+      configureShape<int32_t>();
+      break;
+    case DataType::S64:
+      configureShape<int64_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Fill::execute() const
+{
+  switch (output()->element_type())
+  {
+    case DataType::S8:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int8_t>(value()),
+                                  getTensorShape(output()), getTensorData<int8_t>(output()));
+      break;
+    case DataType::S16:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int16_t>(value()),
+                                  getTensorShape(output()), getTensorData<int16_t>(output()));
+      break;
+    case DataType::S32:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int32_t>(value()),
+                                  getTensorShape(output()), getTensorData<int32_t>(output()));
+      break;
+    case DataType::S64:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int64_t>(value()),
+                                  getTensorShape(output()), getTensorData<int64_t>(output()));
+      break;
+    case DataType::FLOAT32:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<float>(value()),
+                                  getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Fill.h b/compiler/luci-interpreter/src/kernels/Fill.h
new file mode 100644
index 000000000..184f0cb83
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Fill.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FILL_H
+#define LUCI_INTERPRETER_KERNELS_FILL_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Fill : public Kernel
+{
+public:
+  Fill(const Tensor *dims, const Tensor *value, Tensor *output);
+
+  const Tensor *dims() const { return _inputs[0]; }
+  const Tensor *value() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void configureShape();
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FILL_H
diff --git a/compiler/luci-interpreter/src/kernels/Fill.test.cpp b/compiler/luci-interpreter/src/kernels/Fill.test.cpp
new file mode 100644
index 000000000..cf56df507
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Fill.test.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Fill.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class FillTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+template <typename T, DataType DT> void runFillIntKernel(IMemoryManager *memory_manager)
+{
+  Shape dims_shape{2};
+
+  std::vector<int32_t> dims_data = {2, 3};
+  std::vector<T> value_data = {5};
+
+  Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, memory_manager);
+  Tensor value = makeInputTensor<DT>(/*scalar*/ {}, value_data, memory_manager);
+
+  Tensor output_tensor = makeOutputTensor(DT);
+
+  Fill kernel(&dims, &value, &output_tensor);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<T> ref_output_data{5, 5, 5, 5, 5, 5};
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ref_output_data);
+
+  std::vector<int32_t> ref_output_shape{2, 3};
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+template <DataType DT> void runFillQuantIntKernel(IMemoryManager *memory_manager)
+{
+  Shape dims_shape{2};
+
+  std::vector<int32_t> dims_data = {2, 3};
+  std::vector<float> value_data = {5};
+
+  int32_t zero_point = 0;
+
+  if (DT == loco::DataType::S8)
+    zero_point = 1;
+
+  Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, memory_manager);
+  Tensor value = makeInputTensor<DT>(/*scalar*/ {}, /*scale*/ 0.25, /*zero_point*/ zero_point,
+                                     value_data, memory_manager);
+
+  Tensor output_tensor = makeOutputTensor(DT, /*scale*/ 0.25, /*zero_point*/ zero_point);
+
+  Fill kernel(&dims, &value, &output_tensor);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{5, 5, 5, 5, 5, 5};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+
+  std::vector<int32_t> ref_output_shape{2, 3};
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FillTest, FillInt)
+{
+  // Run for int32_t input
+  runFillIntKernel<int32_t, loco::DataType::S32>(_memory_manager.get());
+  // Run for int64_t input
+  runFillIntKernel<int64_t, loco::DataType::S64>(_memory_manager.get());
+  // Run for int8_t input
+  runFillQuantIntKernel<loco::DataType::S8>(_memory_manager.get());
+  // Run for int16_t input
+  runFillQuantIntKernel<loco::DataType::S16>(_memory_manager.get());
+
+  SUCCEED();
+}
+
+TEST_F(FillTest, FillFloat)
+{
+  Shape dims_shape{3};
+
+  std::vector<int64_t> dims_data = {2, 2, 2};
+  std::vector<float> value_data = {5};
+
+  Tensor dims = makeInputTensor<loco::DataType::S64>(dims_shape, dims_data, _memory_manager.get());
+  Tensor value =
+    makeInputTensor<loco::DataType::FLOAT32>(/*scalar*/ {}, value_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+  Fill kernel(&dims, &value, &output_tensor);
+
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{5, 5, 5, 5, 5, 5, 5, 5};
+
+  std::vector<int32_t> ref_output_shape{2, 2, 2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), ref_output_data);
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FillTest, Invalid_Input_Shape_NEG)
+{
+  Shape dims_shape{1, 3};
+
+  std::vector<int32_t> dims_data = {2, 2, 2};
+  std::vector<float> value_data = {5};
+
+  Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, _memory_manager.get());
+  Tensor value =
+    makeInputTensor<loco::DataType::FLOAT32>(/*scalar*/ {}, value_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+  Fill kernel(&dims, &value, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(FillTest, Invalid_Value_Shape_NEG)
+{
+  Shape dims_shape{3};
+
+  std::vector<int32_t> dims_data = {2, 2, 2};
+  std::vector<float> value_data = {5};
+
+  Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, _memory_manager.get());
+  Tensor value = makeInputTensor<loco::DataType::FLOAT32>({1}, value_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+  Fill kernel(&dims, &value, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
index 2fbeefce4..bae1eac70 100644
--- a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
+++ b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
@@ -19,6 +19,8 @@
 
 #include "kernels/Utils.h"
 
+#include <limits>
+
 namespace luci_interpreter
 {
 namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Pack.cpp b/compiler/luci-interpreter/src/kernels/Pack.cpp
index 6fee93890..42aab330c 100644
--- a/compiler/luci-interpreter/src/kernels/Pack.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pack.cpp
@@ -76,9 +76,8 @@ void Pack::configure()
     }
   }
 
-  if (t0->element_type() == DataType::S32 || t0->element_type() == DataType::U8 ||
-      t0->element_type() == DataType::S8 || t0->element_type() == DataType::S16 ||
-      t0->element_type() == DataType::S64)
+  if (t0->element_type() == DataType::U8 || t0->element_type() == DataType::S8 ||
+      t0->element_type() == DataType::S16)
   {
     LUCI_INTERPRETER_CHECK(output()->zero_point() == t0->zero_point());
     LUCI_INTERPRETER_CHECK(output()->scale() == t0->scale());
diff --git a/compiler/luci-interpreter/src/kernels/Pack.test.cpp b/compiler/luci-interpreter/src/kernels/Pack.test.cpp
index 2404e4303..d16320b78 100644
--- a/compiler/luci-interpreter/src/kernels/Pack.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pack.test.cpp
@@ -38,18 +38,26 @@ void Check(std::vector<std::initializer_list<int32_t>> input_shapes,
   std::vector<Tensor> tmp_inputs;
   for (int i = 0; i < input_datas.size(); i++)
   {
-    if (std::is_same<T, float>::value)
+    if (std::is_same<T, float>::value || std::is_same<T, int32_t>::value ||
+        std::is_same<T, int64_t>::value)
     {
       tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {}, ""));
       memory_manager->allocate_memory(tmp_inputs[i]);
       tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
     }
-    else
+    else if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value)
     {
       tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f / 255}, {128}}, ""));
       memory_manager->allocate_memory(tmp_inputs[i]);
       tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
     }
+    else
+    {
+      assert((std::is_same<T, int16_t>::value) && "unexpected dtype is tested");
+      tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f}, {0}}, ""));
+      memory_manager->allocate_memory(tmp_inputs[i]);
+      tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
+    }
   }
   for (int i = 0; i < input_datas.size(); i++)
   {
@@ -57,10 +65,14 @@ void Check(std::vector<std::initializer_list<int32_t>> input_shapes,
   }
 
   Tensor output_tensor = makeOutputTensor(element_type);
-  if (!std::is_same<T, float>::value)
+  if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value)
   {
     output_tensor = makeOutputTensor(element_type, 1.0f / 255, 128);
   }
+  else if (std::is_same<T, int16_t>::value)
+  {
+    output_tensor = makeOutputTensor(element_type, 1.0f, 0);
+  }
 
   PackParams params{};
   params.axis = axis;
@@ -79,7 +91,7 @@ template <typename T> class PackTest : public ::testing::Test
 {
 };
 
-using DataTypes = ::testing::Types<uint8_t, float>;
+using DataTypes = ::testing::Types<uint8_t, int8_t, int16_t, int32_t, int64_t, float>;
 TYPED_TEST_SUITE(PackTest, DataTypes);
 
 TYPED_TEST(PackTest, ThreeInputs)
diff --git a/compiler/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-interpreter/src/kernels/Pad.cpp
index fe172884b..c07f6e310 100644
--- a/compiler/luci-interpreter/src/kernels/Pad.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pad.cpp
@@ -20,6 +20,8 @@
 
 #include <tensorflow/lite/kernels/internal/reference/pad.h>
 
+#include <limits>
+
 namespace luci_interpreter
 {
 namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/PadV2.cpp b/compiler/luci-interpreter/src/kernels/PadV2.cpp
index e90469239..197cdaa69 100644
--- a/compiler/luci-interpreter/src/kernels/PadV2.cpp
+++ b/compiler/luci-interpreter/src/kernels/PadV2.cpp
@@ -20,6 +20,8 @@
 
 #include <tensorflow/lite/kernels/internal/reference/pad.h>
 
+#include <limits>
+
 namespace luci_interpreter
 {
 namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/ReduceMax.cpp b/compiler/luci-interpreter/src/kernels/ReduceMax.cpp
new file mode 100644
index 000000000..d58cd1563
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ReduceMax.cpp
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReduceMax.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reduce.h>
+
+#include <stdexcept>
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+// Returns the number of axes that will be reduced. Removes duplicates.
+static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
+{
+  int reduction_count = num_axes;
+  for (int i = 0; i < num_axes; ++i)
+  {
+    int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
+    assert(current >= 0 && current < input_num_dims);
+    for (int j = 0; j < i; j++)
+    {
+      int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims;
+      // This checks for duplicate axis
+      if (current == previous)
+      {
+        --reduction_count;
+        break;
+      }
+    }
+  }
+  return reduction_count;
+}
+
+static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
+                            bool keep_dims)
+{
+  int input_num_dims = input_shape.num_dims();
+  if (input_num_dims == 0)
+  {
+    return Shape(0);
+  }
+
+  if (keep_dims)
+  {
+    Shape output_shape(input_num_dims);
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          is_axis = true;
+          break;
+        }
+      }
+      if (is_axis)
+      {
+        output_shape.dim(idx) = 1;
+      }
+      else
+      {
+        output_shape.dim(idx) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+  else
+  {
+    int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
+    Shape output_shape(input_num_dims - num_reduce_axes);
+    int num_skip_axes = 0;
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          ++num_skip_axes;
+          is_axis = true;
+          break;
+        }
+      }
+      if (!is_axis)
+      {
+        output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+}
+
+ReduceMax::ReduceMax(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+                     Tensor *resolved_axes, const ReducerParams &params)
+  : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes}, params)
+{
+}
+
+void ReduceMax::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
+
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+  LUCI_INTERPRETER_CHECK(num_axes <= 4);
+
+  // We compute shapes of outputs in configure, assuming that outputs have
+  // static shape
+  // TODO Support dynamic shape
+  Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims);
+  output()->resize(output_shape);
+
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+
+  temp_index->resize(Shape(input_num_dims));
+  resolved_axes->resize(Shape(num_axes));
+}
+
+void ReduceMax::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    // TODO Support quantized kernels
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void ReduceMax::evalFloat() const
+{
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+
+  int num_resolved_axis = 0;
+  LUCI_INTERPRETER_CHECK(
+    tflite::reference_ops::ResolveAxis(input()->shape().num_dims(), axes_data, num_axes,
+                                       getTensorData<int>(resolved_axes), &num_resolved_axis));
+
+  float init_value = std::numeric_limits<float>::lowest();
+  tflite::reference_ops::ReduceGeneric<float>(
+    getTensorData<float>(input()), getTensorShape(input()).DimsData(), input()->shape().num_dims(),
+    getTensorData<float>(output()), getTensorShape(output()).DimsData(),
+    output()->shape().num_dims(), axes_data, num_axes, _params.keep_dims,
+    getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), init_value,
+    [](const float current, const float in) -> float { return (in > current) ? in : current; });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ReduceMax.h b/compiler/luci-interpreter/src/kernels/ReduceMax.h
new file mode 100644
index 000000000..25a66278a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ReduceMax.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_REDUCE_MAX_H
+#define LUCI_INTERPRETER_KERNELS_REDUCE_MAX_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ReduceMax : public KernelWithParams<ReducerParams>
+{
+public:
+  ReduceMax(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+            Tensor *resolved_axes, const ReducerParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axes() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_REDUCE_MAX_H
diff --git a/compiler/luci-interpreter/src/kernels/ReduceMax.test.cpp b/compiler/luci-interpreter/src/kernels/ReduceMax.test.cpp
new file mode 100644
index 000000000..ab688827b
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ReduceMax.test.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReduceMax.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ReduceMaxTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ReduceMaxTest, FloatNotKeepDims)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{1, 0, -3, -3};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = false;
+
+  ReduceMax kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes,
+                   params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{23, 24};
+  std::initializer_list<int32_t> ref_output_shape{2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(ReduceMaxTest, FloatKeepDims)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{0, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  ReduceMax kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes,
+                   params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{20, 22, 24};
+  std::initializer_list<int32_t> ref_output_shape{1, 3, 1};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Shape.cpp b/compiler/luci-interpreter/src/kernels/Shape.cpp
new file mode 100644
index 000000000..0429fe1e5
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Shape.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Shape.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ShapeKernel::ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams &params)
+  : KernelWithParams<ShapeParams>({input}, {output}, params)
+{
+}
+
+void ShapeKernel::configure()
+{
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S32 or
+                         output()->element_type() == DataType::S64);
+  const auto input_shape = input()->shape();
+
+  Shape output_shape(1);
+  output_shape.dim(0) = input_shape.num_dims();
+
+  output()->resize(output_shape);
+}
+
+void ShapeKernel::execute() const
+{
+  switch (params().out_type)
+  {
+    case DataType::S32:
+      evalInt<int32_t>();
+      break;
+    case DataType::S64:
+      evalInt<int64_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void ShapeKernel::evalInt() const
+{
+  const auto input_shape = input()->shape();
+
+  auto output_data = getTensorData<T>(output());
+
+  for (int i = 0; i < input_shape.num_dims(); ++i)
+  {
+    output_data[i] = input_shape.dim(i);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Shape.h b/compiler/luci-interpreter/src/kernels/Shape.h
new file mode 100644
index 000000000..cfaadec91
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Shape.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SHAPE_H
+#define LUCI_INTERPRETER_KERNELS_SHAPE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ShapeKernel : public KernelWithParams<ShapeParams>
+{
+public:
+  ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void evalInt() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SHAPE_H
diff --git a/compiler/luci-interpreter/src/kernels/Shape.test.cpp b/compiler/luci-interpreter/src/kernels/Shape.test.cpp
new file mode 100644
index 000000000..4763e016c
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Shape.test.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Shape.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ShapeTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+template <typename T> void runShapeKernel(loco::DataType dataType, IMemoryManager *memory_manager)
+{
+  Shape input_shape{1, 3, 1, 3, 5};
+
+  Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, "");
+  Tensor output_tensor = makeOutputTensor(dataType);
+
+  ShapeParams params{};
+  params.out_type = dataType;
+
+  ShapeKernel kernel(&input_tensor, &output_tensor, params);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<T> ref_output_data{1, 3, 1, 3, 5};
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ref_output_data);
+
+  std::vector<int32_t> ref_output_shape{5};
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(ShapeTest, OutTypeInt)
+{
+
+  // Run for int32_t output
+  runShapeKernel<int32_t>(loco::DataType::S32, _memory_manager.get());
+  // Run for int64_t output
+  runShapeKernel<int64_t>(loco::DataType::S64, _memory_manager.get());
+
+  SUCCEED();
+}
+
+TEST_F(ShapeTest, Invalid_Output_Type_NEG)
+{
+  Shape input_shape{1, 3};
+
+  Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, "");
+  Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+  ShapeParams params{};
+  params.out_type = loco::DataType::FLOAT32;
+
+  ShapeKernel kernel(&input_tensor, &output_tensor, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/SplitV.cpp b/compiler/luci-interpreter/src/kernels/SplitV.cpp
index 281988272..aa6820889 100644
--- a/compiler/luci-interpreter/src/kernels/SplitV.cpp
+++ b/compiler/luci-interpreter/src/kernels/SplitV.cpp
@@ -43,14 +43,36 @@ void SplitV::configure()
   auto sizes_data = getTensorData<int32_t>(size_splits());
 
   assert(size_splits()->shape().num_dims() == 1);
+
+  int32_t sum = 0;
+  const auto num_dims_size_spits = size_splits()->shape().dim(0);
+  int32_t count_neg_dim = 0;
+
+  for (int32_t i = 0; i < num_dims_size_spits - 1; ++i)
+  {
+    if (sizes_data[i] != -1)
+    {
+      sum += sizes_data[i];
+    }
+    else
+    {
+      count_neg_dim++;
+    }
+  }
+  assert(count_neg_dim < 2);
   assert(size_splits()->shape().num_elements() == num_split);
-  assert(std::accumulate(sizes_data, sizes_data + num_split, 0) ==
-         input()->shape().dim(_axis_value));
 
   auto output_shape = input()->shape();
   for (int32_t i = 0; i < num_split; ++i)
   {
-    output_shape.dim(_axis_value) = sizes_data[i];
+    if (sizes_data[i] == -1)
+    {
+      output_shape.dim(_axis_value) = input()->shape().dim(_axis_value) - sum;
+    }
+    else
+    {
+      output_shape.dim(_axis_value) = sizes_data[i];
+    }
     _outputs[i]->resize(output_shape);
   }
 }
diff --git a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp
index c6452cdb0..a8730d861 100644
--- a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp
+++ b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp
@@ -136,6 +136,11 @@ void StridedSlice::execute() const
                                           getTensorData<uint8_t>(input()), getTensorShape(output()),
                                           getTensorData<uint8_t>(output()));
       break;
+    case DataType::S32:
+      tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+                                          getTensorData<int32_t>(input()), getTensorShape(output()),
+                                          getTensorData<int32_t>(output()));
+      break;
     default:
       throw std::runtime_error("Unsupported type.");
   }
diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
index dba39050c..40207090b 100644
--- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp
+++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
@@ -187,7 +187,7 @@ void GraphLoader::loadTensors()
     const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i));
 
     if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node))
-      throw std::runtime_error("Unknown Custom Node, yet.");
+      throw std::runtime_error("Unsupported Custom operator. " + node->name());
 
     if (!isTensorProducingNode(node))
       continue;
diff --git a/compiler/luci-interpreter/src/loader/nodes/Add.cpp b/compiler/luci-interpreter/src/loader/nodes/Add.cpp
index decccaa1d..501e84752 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Add.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Add.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleAdd(const luci::CircleNode *circle_node,
                                                KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleAdd *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleAdd *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *input1 = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp b/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp
index 0ee367748..f3ca55744 100644
--- a/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleArgMax(const luci::CircleNode *circle_node,
                                                   KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleArgMax *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleArgMax *>(circle_node);
   assert(node->arity() == 2);
   const Tensor *input = helper.getInputTensor(node->input());
   const Tensor *axis = helper.getInputTensor(node->dimension());
diff --git a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
index efb011257..a8135706f 100644
--- a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
@@ -25,9 +25,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(const luci::CircleNode *circle_node,
                                                          KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleAveragePool2D *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleAveragePool2D *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->value());
diff --git a/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
index aae3dbab1..9da2f6d93 100644
--- a/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
@@ -25,9 +25,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleBatchMatMul(const luci::CircleNode *circle_node,
                                                        KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleBatchMatMul *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleBatchMatMul *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *lhs = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp b/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp
index 33d0e2db6..ac6ebb30f 100644
--- a/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleBatchToSpaceND(const luci::CircleNode *circle_node,
                                                           KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleBatchToSpaceND *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleBatchToSpaceND *>(circle_node);
   assert(node->arity() == 3);
 
   const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Cast.cpp b/compiler/luci-interpreter/src/loader/nodes/Cast.cpp
index 21ea5ceab..a16354c96 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Cast.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Cast.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleCast(const luci::CircleNode *circle_node,
                                                 KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleCast *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleCast *>(circle_node);
 
   assert(node->arity() == 1);
 
diff --git a/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp b/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp
index 7823a9967..ba2564ea2 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleConcatenation(const luci::CircleNode *circle_node,
                                                          KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleConcatenation *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleConcatenation *>(circle_node);
   std::vector<const Tensor *> inputs(node->numValues());
   for (uint32_t i = 0; i < node->numValues(); ++i)
   {
diff --git a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
index b48d97d19..218165e20 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
@@ -25,9 +25,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle_node,
                                                   KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleConv2D *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleConv2D *>(circle_node);
   assert(node->arity() == 3);
 
   const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp
index 0310fb23f..174946367 100644
--- a/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleDepthToSpace(const luci::CircleNode *circle_node,
                                                         KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleDepthToSpace *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleDepthToSpace *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
index db26ecf2e..8af1e3b58 100644
--- a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
@@ -25,9 +25,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(const luci::CircleNode *circle_node,
                                                            KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleDepthwiseConv2D *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleDepthwiseConv2D *>(circle_node);
   assert(node->arity() == 3);
 
   const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp
index 4aae56469..787322e9b 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleDequantize(const luci::CircleNode *circle_node,
                                                       KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleDequantize *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleDequantize *>(circle_node);
 
   const Tensor *input = helper.getInputTensor(node->input());
   Tensor *output = helper.getOutputTensor(node);
diff --git a/compiler/luci-interpreter/src/loader/nodes/Div.cpp b/compiler/luci-interpreter/src/loader/nodes/Div.cpp
index 56c2e98f2..0611dfdab 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Div.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Div.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleDiv(const luci::CircleNode *circle_node,
                                                KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleDiv *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleDiv *>(circle_node);
   assert(node->arity() == 2);
   const Tensor *input1 = helper.getInputTensor(node->x());
   const Tensor *input2 = helper.getInputTensor(node->y());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Elu.cpp b/compiler/luci-interpreter/src/loader/nodes/Elu.cpp
index 98ee78be7..a79985e3b 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Elu.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Elu.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleElu(const luci::CircleNode *circle_node,
                                                KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleElu *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleElu *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->features());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Equal.cpp b/compiler/luci-interpreter/src/loader/nodes/Equal.cpp
index 649d9bfe9..59692883f 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Equal.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Equal.cpp
@@ -25,9 +25,7 @@ std::unique_ptr<Kernel> build_kernel_CircleEqual(const luci::CircleNode *circle_
                                                  KernelBuilderHelper &helper)
 
 {
-  const auto *node = dynamic_cast<const luci::CircleEqual *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleEqual *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *x = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Exp.cpp b/compiler/luci-interpreter/src/loader/nodes/Exp.cpp
index 411d142c3..30d11cb89 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Exp.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Exp.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleExp(const luci::CircleNode *circle_node,
                                                KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleExp *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleExp *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Fill.cpp b/compiler/luci-interpreter/src/loader/nodes/Fill.cpp
new file mode 100644
index 000000000..3aefdf1c5
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Fill.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Fill.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFill(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleFill *>(circle_node);
+  assert(node->arity() == 2);
+
+  const auto dims = helper.getInputTensor(node->dims());
+  const auto value = helper.getInputTensor(node->value());
+  auto output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Fill>(dims, value, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Floor.cpp b/compiler/luci-interpreter/src/loader/nodes/Floor.cpp
index 6d8435f6c..e0a223116 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Floor.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Floor.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleFloor(const luci::CircleNode *circle_node,
                                                  KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleFloor *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleFloor *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp b/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp
index cae2e186e..a45d89e38 100644
--- a/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleFloorDiv(const luci::CircleNode *circle_node,
                                                     KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleFloorDiv *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleFloorDiv *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *x = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp
index 0b8ac44bd..b7b742b8a 100644
--- a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(const luci::CircleNode *circle_node,
                                                           KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleFullyConnected *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleFullyConnected *>(circle_node);
   assert(node->arity() == 3);
 
   const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Gather.cpp b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp
index 9df9775c5..2ee2906e0 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Gather.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleGather(const luci::CircleNode *circle_node,
                                                   KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleGather *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleGather *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *params = helper.getInputTensor(node->params());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Greater.cpp b/compiler/luci-interpreter/src/loader/nodes/Greater.cpp
index 3db11b840..80aa63cf0 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Greater.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Greater.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleGreater(const luci::CircleNode *circle_node,
                                                    KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleGreater *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleGreater *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *x = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp
index dbe051d67..272f2843b 100644
--- a/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleGreaterEqual(const luci::CircleNode *circle_node,
                                                         KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleGreaterEqual *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleGreaterEqual *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *x = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/If.cpp b/compiler/luci-interpreter/src/loader/nodes/If.cpp
index 5983f4d3b..3ac7d4941 100644
--- a/compiler/luci-interpreter/src/loader/nodes/If.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/If.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleIf(const luci::CircleNode *circle_node,
                                               KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleIf *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleIf *>(circle_node);
   auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node);
   assert(node->arity() == 1 + node->input_count());
   assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
diff --git a/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp b/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp
index 0a8fb85e2..06031e5bc 100644
--- a/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleInstanceNorm(const luci::CircleNode *circle_node,
                                                         KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleInstanceNorm *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleInstanceNorm *>(circle_node);
   assert(node->arity() == 3);
 
   const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp b/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp
index 05f920266..6e22e6d4e 100644
--- a/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleL2Normalize(const luci::CircleNode *circle_node,
                                                        KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleL2Normalize *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleL2Normalize *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp
index 0e70afafa..95b55896f 100644
--- a/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleL2Pool2D(const luci::CircleNode *circle_node,
                                                     KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleL2Pool2D *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleL2Pool2D *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->value());
diff --git a/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp b/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp
index 7b229ad0e..bbf5067b1 100644
--- a/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleLeakyRelu(const luci::CircleNode *circle_node,
                                                      KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleLeakyRelu *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleLeakyRelu *>(circle_node);
   assert(node->arity() == 1);
   const Tensor *input = helper.getInputTensor(node->features());
   Tensor *output = helper.getOutputTensor(node);
diff --git a/compiler/luci-interpreter/src/loader/nodes/Less.cpp b/compiler/luci-interpreter/src/loader/nodes/Less.cpp
index 81156f275..ae914ecc9 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Less.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Less.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleLess(const luci::CircleNode *circle_node,
                                                 KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleLess *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleLess *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *x = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp
index 82141e5ae..f1b424b55 100644
--- a/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleLessEqual(const luci::CircleNode *circle_node,
                                                      KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleLessEqual *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleLessEqual *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *x = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp b/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp
index a12dce0a0..962ca2d7c 100644
--- a/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp
@@ -25,9 +25,7 @@ std::unique_ptr<Kernel>
 build_kernel_CircleLocalResponseNormalization(const luci::CircleNode *circle_node,
                                               KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleLocalResponseNormalization *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleLocalResponseNormalization *>(circle_node);
   assert(node->arity() == 1);
   const Tensor *input = helper.getInputTensor(node->input());
   Tensor *output = helper.getOutputTensor(node);
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp b/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp
index 6cf547aae..432204115 100644
--- a/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleLogSoftmax(const luci::CircleNode *circle_node,
                                                       KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleLogSoftmax *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleLogSoftmax *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->logits());
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp
index 2c9549f71..bf3cb671a 100644
--- a/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleLogicalAnd(const luci::CircleNode *circle_node,
                                                       KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleLogicalAnd *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleLogicalAnd *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *input1 = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp
index 3d327d6c4..fefcd9a06 100644
--- a/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleLogicalNot(const luci::CircleNode *circle_node,
                                                       KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleLogicalNot *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleLogicalNot *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp
index 50566bb30..a416cb401 100644
--- a/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleLogicalOr(const luci::CircleNode *circle_node,
                                                      KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleLogicalOr *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleLogicalOr *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *input1 = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp b/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp
index e4160edb3..4a69deef1 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleLogistic(const luci::CircleNode *circle_node,
                                                     KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleLogistic *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleLogistic *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp
index 914f22838..f66a206ca 100644
--- a/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleMaxPool2D(const luci::CircleNode *circle_node,
                                                      KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleMaxPool2D *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleMaxPool2D *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->value());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp b/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp
index dc50d6773..d0bff776a 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleMaximum(const luci::CircleNode *circle_node,
                                                    KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleMaximum *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleMaximum *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *input1 = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Mean.cpp b/compiler/luci-interpreter/src/loader/nodes/Mean.cpp
index 97d91207f..0dec63e79 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Mean.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Mean.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleMean(const luci::CircleNode *circle_node,
                                                 KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleMean *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleMean *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp b/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp
index ff659524a..1a49c1090 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleMinimum(const luci::CircleNode *circle_node,
                                                    KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleMinimum *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleMinimum *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *input1 = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp b/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp
index ebf294583..b221b4574 100644
--- a/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleMirrorPad(const luci::CircleNode *circle_node,
                                                      KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleMirrorPad *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleMirrorPad *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Mul.cpp b/compiler/luci-interpreter/src/loader/nodes/Mul.cpp
index 4f9da967d..f9984853a 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Mul.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Mul.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleMul(const luci::CircleNode *circle_node,
                                                KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleMul *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleMul *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *input1 = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Neg.cpp b/compiler/luci-interpreter/src/loader/nodes/Neg.cpp
index 23c00537b..9a9ecf991 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Neg.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Neg.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleNeg(const luci::CircleNode *circle_node,
                                                KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleNeg *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleNeg *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp
index 8e5711fc1..3916a5854 100644
--- a/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleNotEqual(const luci::CircleNode *circle_node,
                                                     KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleNotEqual *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleNotEqual *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *x = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp b/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp
index e31601bf6..f3d700c95 100644
--- a/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CirclePRelu(const luci::CircleNode *circle_node,
                                                  KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CirclePRelu *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CirclePRelu *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Pack.cpp b/compiler/luci-interpreter/src/loader/nodes/Pack.cpp
index 699472081..efc5850e0 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Pack.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Pack.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CirclePack(const luci::CircleNode *circle_node,
                                                 KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CirclePack *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CirclePack *>(circle_node);
   assert(node->arity() == node->values_count());
 
   std::vector<const Tensor *> inputs(node->values_count());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Pad.cpp b/compiler/luci-interpreter/src/loader/nodes/Pad.cpp
index 770549295..67ce997a7 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Pad.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Pad.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CirclePad(const luci::CircleNode *circle_node,
                                                KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CirclePad *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CirclePad *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp b/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp
index 12deb15f0..e378a972a 100644
--- a/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CirclePadV2(const luci::CircleNode *circle_node,
                                                  KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CirclePadV2 *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CirclePadV2 *>(circle_node);
   assert(node->arity() == 3);
 
   const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Pow.cpp b/compiler/luci-interpreter/src/loader/nodes/Pow.cpp
index b430bc94f..d32fc3dbb 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Pow.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Pow.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CirclePow(const luci::CircleNode *circle_node,
                                                KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CirclePow *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CirclePow *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *input1 = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp
index fd9836345..cb36fb6da 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp
@@ -24,9 +24,8 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleQuantize(const luci::CircleNode *circle_node,
                                                     KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleQuantize *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleQuantize *>(circle_node);
+  assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->input());
   Tensor *output = helper.getOutputTensor(node);
diff --git a/compiler/luci-interpreter/src/loader/nodes/ReduceMax.cpp b/compiler/luci-interpreter/src/loader/nodes/ReduceMax.cpp
new file mode 100644
index 000000000..1a8522dd6
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ReduceMax.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ReduceMax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReduceMax(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleReduceMax *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *axes = helper.getInputTensor(node->reduction_indices());
+  Tensor *output = helper.getOutputTensor(node);
+
+  auto temp_index_unique =
+    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+  temp_index_unique->set_observable(false);
+  temp_index_unique->set_data_buffer(nullptr);
+  Tensor *temp_index =
+    helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique));
+
+  auto resolved_axes_unique =
+    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+  resolved_axes_unique->set_observable(false);
+  resolved_axes_unique->set_data_buffer(nullptr);
+  Tensor *resolved_axes =
+    helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique));
+
+  ReducerParams params{};
+  params.keep_dims = node->keep_dims();
+
+  return std::make_unique<kernels::ReduceMax>(input, axes, output, temp_index, resolved_axes,
+                                              params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Relu.cpp b/compiler/luci-interpreter/src/loader/nodes/Relu.cpp
index d53a66a06..1d64c1c4e 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Relu.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Relu.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleRelu(const luci::CircleNode *circle_node,
                                                 KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleRelu *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleRelu *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->features());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp b/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp
index f1b5d219b..e50cd2545 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleRelu6(const luci::CircleNode *circle_node,
                                                  KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleRelu6 *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleRelu6 *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->features());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp b/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp
index 89e3ecebf..76ddd88a3 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleReshape(const luci::CircleNode *circle_node,
                                                    KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleReshape *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleReshape *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *input = helper.getInputTensor(node->tensor());
diff --git a/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp b/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp
index dca56588d..dc2b88ad3 100644
--- a/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleResizeBilinear(const luci::CircleNode *circle_node,
                                                           KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleResizeBilinear *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleResizeBilinear *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp b/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp
index d1ea19c0f..c7058ae78 100644
--- a/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp
@@ -25,9 +25,7 @@ std::unique_ptr<Kernel>
 build_kernel_CircleResizeNearestNeighbor(const luci::CircleNode *circle_node,
                                          KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleResizeNearestNeighbor *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleResizeNearestNeighbor *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp b/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp
index ea00f5408..c1a7f5350 100644
--- a/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleReverseV2(const luci::CircleNode *circle_node,
                                                      KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleReverseV2 *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleReverseV2 *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *input = helper.getInputTensor(node->tensor());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp b/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp
index ff87f435c..0714a5dba 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleRsqrt(const luci::CircleNode *circle_node,
                                                  KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleRsqrt *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleRsqrt *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp
index 89528d5ee..d172ef438 100644
--- a/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp
@@ -24,9 +24,8 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleSVDF(const luci::CircleNode *circle_node,
                                                 KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleSVDF *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleSVDF *>(circle_node);
+  assert(node->arity() == 5);
 
   const Tensor *input = helper.getInputTensor(node->input());
   const Tensor *feature = helper.getInputTensor(node->weight_feature());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Shape.cpp b/compiler/luci-interpreter/src/loader/nodes/Shape.cpp
new file mode 100644
index 000000000..d1edbc794
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Shape.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Shape.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleShape(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleShape *>(circle_node);
+  assert(node->arity() == 1);
+
+  const auto input = helper.getInputTensor(node->input());
+  auto output = helper.getOutputTensor(node);
+
+  ShapeParams shape_params{};
+  shape_params.out_type = node->out_type();
+
+  return std::make_unique<kernels::ShapeKernel>(input, output, shape_params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Slice.cpp b/compiler/luci-interpreter/src/loader/nodes/Slice.cpp
index 741cd0806..60ac6417c 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Slice.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Slice.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleSlice(const luci::CircleNode *circle_node,
                                                  KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleSlice *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleSlice *>(circle_node);
   assert(node->arity() == 3);
 
   const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp b/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp
index b15e4b6f3..f41f63f6f 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleSoftmax(const luci::CircleNode *circle_node,
                                                    KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleSoftmax *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleSoftmax *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->logits());
diff --git a/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp b/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp
index 91c237aa5..b6e6cf516 100644
--- a/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleSpaceToBatchND(const luci::CircleNode *circle_node,
                                                           KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleSpaceToBatchND *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleSpaceToBatchND *>(circle_node);
   assert(node->arity() == 3);
 
   const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp b/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp
index 3cbbd9718..63fdb95ec 100644
--- a/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleSpaceToDepth(const luci::CircleNode *circle_node,
                                                         KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleSpaceToDepth *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleSpaceToDepth *>(circle_node);
   assert(node->arity() == 1);
   const Tensor *input = helper.getInputTensor(node->input());
 
diff --git a/compiler/luci-interpreter/src/loader/nodes/Split.cpp b/compiler/luci-interpreter/src/loader/nodes/Split.cpp
index 32553ad5e..3f6d4a7df 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Split.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Split.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleSplit(const luci::CircleNode *circle_node,
                                                  KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleSplit *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleSplit *>(circle_node);
   auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node);
   assert(node->arity() == 2);
   assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
diff --git a/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp b/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp
index d78816447..0788822ca 100644
--- a/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleSplitV(const luci::CircleNode *circle_node,
                                                   KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleSplitV *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleSplitV *>(circle_node);
   auto output_nodes = collectOutputNodes<luci::CircleSplitVOut>(node);
   assert(node->arity() == 3);
   assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
diff --git a/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp b/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp
index 56dd986f1..b9843fe0b 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleSqrt(const luci::CircleNode *circle_node,
                                                 KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleSqrt *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleSqrt *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Square.cpp b/compiler/luci-interpreter/src/loader/nodes/Square.cpp
index 43aadb969..0ad7c1772 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Square.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Square.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleSquare(const luci::CircleNode *circle_node,
                                                   KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleSquare *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleSquare *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp b/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp
index 6a2717aa2..e4c6fd851 100644
--- a/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleSquaredDifference(const luci::CircleNode *circle_node,
                                                              KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleSquaredDifference *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleSquaredDifference *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *input1 = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp b/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp
index 583ff9314..6885f8077 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleSqueeze(const luci::CircleNode *circle_node,
                                                    KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleSqueeze *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleSqueeze *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp b/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp
index fe5fa7707..359b4e3e9 100644
--- a/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleStridedSlice(const luci::CircleNode *circle_node,
                                                         KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleStridedSlice *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleStridedSlice *>(circle_node);
   assert(node->arity() == 4);
 
   const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Sub.cpp b/compiler/luci-interpreter/src/loader/nodes/Sub.cpp
index bad4fbb13..a6252cb53 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Sub.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Sub.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleSub(const luci::CircleNode *circle_node,
                                                KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleSub *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleSub *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *input1 = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp b/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp
index f4255291b..a58ef60a8 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleTanh(const luci::CircleNode *circle_node,
                                                 KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleTanh *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleTanh *>(circle_node);
   assert(node->arity() == 1);
 
   const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp b/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp
index 4e095fbbc..ea17d8311 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleTranspose(const luci::CircleNode *circle_node,
                                                      KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleTranspose *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleTranspose *>(circle_node);
   assert(node->arity() == 2);
 
   const Tensor *input = helper.getInputTensor(node->a());
diff --git a/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp b/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp
index 1b954c35c..d773e301e 100644
--- a/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleTransposeConv(const luci::CircleNode *circle_node,
                                                          KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleTransposeConv *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleTransposeConv *>(circle_node);
   assert(node->arity() == 4);
 
   const Tensor *input_sizes = helper.getInputTensor(node->inputSizes());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp b/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp
index 978c738c6..a1c0d323a 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleUnpack(const luci::CircleNode *circle_node,
                                                   KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleUnpack *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleUnpack *>(circle_node);
   auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node);
   assert(node->arity() == 1);
   assert(output_nodes.size() == static_cast<size_t>(node->num()));
diff --git a/compiler/luci-interpreter/src/loader/nodes/While.cpp b/compiler/luci-interpreter/src/loader/nodes/While.cpp
index 284dc0c68..8fde6ec8a 100644
--- a/compiler/luci-interpreter/src/loader/nodes/While.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/While.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
 std::unique_ptr<Kernel> build_kernel_CircleWhile(const luci::CircleNode *circle_node,
                                                  KernelBuilderHelper &helper)
 {
-  const auto *node = dynamic_cast<const luci::CircleWhile *>(circle_node);
-  if (node == nullptr)
-    throw std::runtime_error("wrong builder for operation");
+  const auto *node = loco::must_cast<const luci::CircleWhile *>(circle_node);
 
   auto output_nodes = collectOutputNodes<luci::CircleWhileOut>(node);
   assert(node->arity() == node->input_count());
diff --git a/compiler/luci-micro/CMakeLists.txt b/compiler/luci-micro/CMakeLists.txt
index c8a2e12e1..642cf14a3 100644
--- a/compiler/luci-micro/CMakeLists.txt
+++ b/compiler/luci-micro/CMakeLists.txt
@@ -15,7 +15,7 @@ set(CMAKE_ARM_OPTIONS
   -DLUCI_STATIC=ON
   -DBUILD_CMSIS_NN_FUNCTIONS=ON
   -DTARGET_CPU=cortex-m7
-  "-DCMAKE_TOOLCHAIN_FILE=${NNAS_PROJECT_SOURCE_DIR}/infra/nncc/cmake/buildtool/config/arm-non-eabi-gcc.cmake"
+  "-DCMAKE_TOOLCHAIN_FILE=${NNAS_PROJECT_SOURCE_DIR}/infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake"
   "-DLUCI_INTERPRETER_PAL_DIR=${CMAKE_CURRENT_SOURCE_DIR}/../luci-interpreter/pal/mcu"
   "-DNNAS_PROJECT_SOURCE_DIR=${NNAS_PROJECT_SOURCE_DIR}"
   "-DNNAS_EXTERNALS_DIR=${NNAS_EXTERNALS_DIR}"
diff --git a/compiler/luci-micro/luci-interpreter/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/CMakeLists.txt
new file mode 100644
index 000000000..1f7acee87
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(LUCI_INTERPRETER_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
+set(LUCI_INTERPRETER_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src")
+if (NOT LUCI_INTERPRETER_PAL_DIR)
+    set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pal/linux")
+endif()
+
+set(KERNEL_REGISTER_FILE ${LUCI_INTERPRETER_PAL_DIR}/KernelsToBuild.lst)
+
+if (NOT DEFINED CUSTOM_LUCI_INTERPRETER_SUFFIX)
+    set(LUCI_INTERPRETER_SUFFIX "")
+else()
+    set(LUCI_INTERPRETER_SUFFIX ${CUSTOM_LUCI_INTERPRETER_SUFFIX})
+endif()
+
+add_subdirectory(src)
diff --git a/compiler/luci-micro/luci-interpreter/README.md b/compiler/luci-micro/luci-interpreter/README.md
new file mode 100644
index 000000000..77ec5c81c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/README.md
@@ -0,0 +1,158 @@
+# luci-interpreter
+
+`luci-interpreter` is an inference engine for neural networks represented in luci IR.
+See `compiler/luci/lang` directory for details about IR.
+You can find useful infrastructure, like importer/exporter, optimizations in `compiler/luci`.
+
+`luci-interpreter` provides:
+- Basic inference functionality, input setters and output getters
+- Interface for inspecting hidden interpreter state, like activation values during inference
+- Customization mechanisms to fit the interpreter to specific platforms, like MCUs
+
+Public interface headers are placed in `luci-interpreter/include/luci_interpreter` directory
+
+## Basic usage
+
+Minimal usage includes:
+- Setting input data
+- Running inference
+- Fetching inference results
+
+Interpreter object is reusable and can run multiple inferences.
+Elements in tensors (input/output/internal) are stored contiguously and have C-like layout:
+This means for tensor t=[[0, 1],[2, 3]], t[0,1] == 1.
+
+Input and output tensors have the same indexes as in original luci model. 
+
+**Usage example:**
+``` c++
+// Note getTensorSize is a function that computes tensor size,
+// it is not part of interpreter and should be implemented by user 
+
+luci_interpreter::Interpreter interpreter(luci_module);
+
+// Set inputs
+// assuming model has only one input and one output
+const auto input_nodes = loco::input_nodes(module->graph());
+
+const auto *input_node = dynamic_cast<const luci::CircleInput *>(input_nodes[0]);
+std::vector<char> input_data(getTensorSize(input_node));
+// Initialize input data here
+
+interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+
+// Start inference
+interpreter.interpret();
+
+// Fetch inference results
+const auto output_nodes = loco::output_nodes(module->graph());
+const auto *output_node = dynamic_cast<const luci::CircleOutput *>(output_nodes[0]);
+std::vector<char> output_data(getTensorSize(output_node));
+interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+```
+
+## Inspecting intermediate state
+
+Interpreter provides interfaces to investigate internal state of interpreter during inference.
+
+This is done by "observer" mechanism:
+- `Interpreter` class has `attachObserver` method, which takes pointer to `ExecutionObserver` object
+- `ExecutionObserver` defines several callback methods user can override to inject custom code
+
+ExecutionObserver provides three callbacks:
+- `postTensorWrite` checks contents of output tensor after operation execution
+- `preOperatorExecute` notifies that interpreter is going to execute operation
+- `postOperatorExecute` notifies that interpreter has finished execution of an operation
+
+See `luci-interpreter/include/luci_interpreter/Interpreter.h` for this interface details.
+
+**Usage example:**
+``` c++
+class CustomExecutionObserver: public luci_interpreter::ExecutionObserver
+{
+public:
+  void postTensorWrite(const luci::CircleNode *node, const Tensor *tensor) override
+  {
+    if (tensor->element_type() != loco::DataType::FLOAT32)
+      return;
+    for (int i = 0; i < tensor->shape().num_elements(); ++i)
+      std::cout << tensor->data<float>[i] << ", ";
+  }
+
+  // User observer can override only needed methods,
+  // others will inherit empty implementation from base observer.
+
+  // void preOperatorExecute(const luci::CircleNode *node);
+  // void postOperatorExecute(const luci::CircleNode *node);
+};
+
+luci_interpreter::Interpreter interpreter(module);
+CustomExecutionObserver observer;
+interpreter.attachObserver(&observer);
+
+// initialize input_data
+interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+
+interpreter.interpret();
+```
+
+## Customizing inference
+
+### Memory manager
+
+Interpreter provides a handle for altering default memory management mechanisms.
+
+This is done by `MemoryManger` interface, see `luci-interpreter/include/luci_interpreter/MemoryManager.h` for implementation details.
+
+This header contains `IMemoryManager` abstract class which is responsible for allocation and dealocation of tensors' memory.
+
+User can construct an interpreter with one of predefined memory managers or their own custom memory manager.
+Note that one memory manager could be shared between multiple interpreter instances, because an interpreter does not own the manager object. 
+
+List of predefined memory managers:
+- `SimpleMemoryManager` This is a simple wrapper around new/delete, default one.
+- `TestMemoryManager` Memorizes all allocated memory and releases it in Manager destructor, used in kernel unit tests.
+- `BuddyMemoryManager` Implements Buddy algorithm, uses external buffer for tensor data allocations, does not need new/delete.
+- `StaticMemoryManger` Uses precomputed memory allocation plan. Requires preparation with MemoryPlanner, but could reduce memory consumption in restricted environments (like MCUs).
+
+**SimpleMemoryManager usage example:**
+
+No need to select anything, to use this memory manager.
+``` c++
+luci_interpreter::Interpreter interpreter(module);
+```
+
+**TestMemoryManager usage example:**
+
+``` c++
+luci_interpreter::TestMemoryManager mm;
+luci_interpreter::Interpreter interpreter(module, &mm);
+```
+
+**BuddyMemoryManager usage example:**
+
+`BuddyMemoryManager` implements a classic allocation algorithm: https://en.wikipedia.org/wiki/Buddy_memory_allocation.
+
+This allocator uses an external buffer as a memory pool. That allows to use static memory arrays for allocations.
+
+Limitations
+- Current implementation uses only lower power-of-two bytes of given buffer.
+
+  For example for 1000 bytes buffer, only lower 512 bytes will be used.
+- Current implementation can handle maximum 4 gigabyte memory pool
+
+``` c++
+  constexpr int buffer_size = 2048;
+  static uint8_t buffer[buffer_size];
+  luci_interpreter::BuddyMemoryManager memory_manager(buffer, buffer_size);
+  luci_interpreter::Interpreter interpreter(module.get(), &memory_manager);
+```
+
+**StaticMemoryManager usage example:**
+``` c++
+TBD when it is merged
+```
+
+## Further reading
+
+If you want to participate in development, please read `DEVELOPER.md` for SW architecture details.
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h
new file mode 100644
index 000000000..205baa626
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h
@@ -0,0 +1,144 @@
+/* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/MemoryManager.h"
+
+#ifndef LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+
+namespace luci_interpreter
+{
+
+class BuddyMemoryManager : public IMemoryManager
+{
+public:
+  BuddyMemoryManager(uint8_t *memory_start, int32_t memSize);
+
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+
+private:
+  struct Block
+  {
+    Block *next_free;
+    bool is_free;
+    uint32_t size;
+    // debug field
+    Block *self;
+  };
+
+  Block *_start_block;
+  int32_t _num_blocks;
+  uint32_t _size;
+  Block *_free_blocks[32]{};
+
+  static int32_t lowerLog2(uint32_t val)
+  {
+    int32_t i = 0;
+    while (val >>= 1)
+      i++;
+
+    return i;
+  }
+
+  void addToBlocks(Block *block, int32_t l)
+  {
+    if (!block)
+      return;
+
+    block->next_free = _free_blocks[l];
+    _free_blocks[l] = block;
+  }
+
+  void removeFromBlocks(const Block *block, int32_t l)
+  {
+    if (!block)
+      return;
+
+    Block *tmp = _free_blocks[l];
+
+    if (block == tmp)
+    {
+      _free_blocks[l] = block->next_free;
+      return;
+    }
+
+    while (tmp)
+    {
+      if (tmp->next_free == block)
+      {
+        tmp->next_free = block->next_free;
+        return;
+      }
+
+      tmp = tmp->next_free;
+    }
+  }
+
+  void divideBlock(Block *block, int32_t l)
+  {
+    int32_t size = ((block->size + sizeof(Block)) / 2) - sizeof(Block);
+
+    removeFromBlocks(block, l);
+
+    // there is no need to add to the free_blocks list here
+    block->is_free = true;
+    block->size = size;
+    block->self = block;
+
+    Block *buddy;
+    buddy = (Block *)((uint8_t *)block + sizeof(Block) + size);
+    buddy->is_free = true;
+    buddy->size = size;
+    buddy->self = buddy;
+
+    addToBlocks(buddy, l - 1);
+  }
+
+  Block *mergeBlock(Block *block)
+  {
+    Block *buddy;
+
+    const int32_t l = lowerLog2(block->size + sizeof(Block));
+
+    const int64_t address = ((uint8_t *)block - (uint8_t *)_start_block);
+    buddy = (Block *)((address ^ (1 << l)) + (uint8_t *)_start_block);
+
+    if (!buddy->is_free || buddy->size != block->size)
+      return nullptr;
+
+    if (block > buddy)
+    {
+      Block *x = block;
+      block = buddy;
+      buddy = x;
+    }
+
+    removeFromBlocks(block, l);
+    removeFromBlocks(buddy, l);
+
+    block->size = block->size * 2 + sizeof(Block);
+    block->is_free = true;
+    block->self = block;
+
+    addToBlocks(block, l + 1);
+
+    return block;
+  }
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h
new file mode 100644
index 000000000..375b1ae20
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
+#define __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
+
+#include <luci/Import/GraphBuilderRegistry.h>
+
+namespace luci_interpreter
+{
+
+/**
+ * @brief Creates and returns GraphBuilderSource, which allows to not copy constant buffers from
+ * model's file.
+ *
+ * @warning Use this source only in case when model's buffer alive longer than Interpreter.
+ */
+std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying();
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/Interpreter.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/Interpreter.h
new file mode 100644
index 000000000..8e2f457a5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/Interpreter.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_INTERPRETER_H
+#define LUCI_INTERPRETER_INTERPRETER_H
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <luci/IR/Nodes/CircleInput.h>
+#include <luci/IR/Nodes/CircleOutput.h>
+
+#include "luci_interpreter/MemoryManager.h"
+#include <luci/IR/Module.h>
+
+#include <memory>
+#include <vector>
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class ExecutionObserver
+{
+public:
+  virtual ~ExecutionObserver();
+
+  // Called when the value of a tensor has been updated during execution.
+  virtual void postTensorWrite(const luci::CircleNode *node, const Tensor *tensor);
+
+  // Called before / after executing an operator.
+  // Note that these methods are not called for auxiliary operators (CircleInput, CircleOutput,
+  // CircleConst and Circle*Out).
+  virtual void preOperatorExecute(const luci::CircleNode *node);
+  virtual void postOperatorExecute(const luci::CircleNode *node);
+};
+
+class Interpreter
+{
+public:
+  explicit Interpreter(const luci::Module *module);
+
+  explicit Interpreter(const luci::Module *module, IMemoryManager *memory_manager);
+
+  ~Interpreter();
+
+  void writeInputTensor(const luci::CircleInput *input_node, const void *data, size_t data_size);
+
+  void readOutputTensor(const luci::CircleOutput *output_node, void *data, size_t data_size);
+
+  void interpret();
+
+  void attachObserver(ExecutionObserver *observer);
+
+  const Tensor *getTensor(const loco::Node *node) { return _node_to_tensor[node]; }
+
+private:
+  // _default_memory_manager should be before _runtime_module due to
+  // the order of deletion in the destructor
+  std::unique_ptr<IMemoryManager> _default_memory_manager = nullptr;
+  std::unique_ptr<class RuntimeModule> _runtime_module;
+
+  // Observer functionality support.
+  std::unique_ptr<struct RuntimeToIR> _runtime_to_ir;
+  std::unordered_map<const loco::Node *, Tensor *> _node_to_tensor;
+  std::unique_ptr<class EventNotifier> _event_notifier;
+  std::vector<ExecutionObserver *> _observers;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_INTERPRETER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/MemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/MemoryManager.h
new file mode 100644
index 000000000..f32c52095
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/MemoryManager.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_MEMORY_MANAGER_H
+
+#include "luci_interpreter/core/DataType.h"
+#include "luci_interpreter/core/Tensor.h"
+
+namespace luci_interpreter
+{
+
+class IMemoryManager
+{
+public:
+  virtual void allocate_memory(luci_interpreter::Tensor &tensor) = 0;
+  virtual void release_memory(luci_interpreter::Tensor &tensor) = 0;
+
+  virtual ~IMemoryManager() = default;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h
new file mode 100644
index 000000000..658a1c609
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+
+#include "luci_interpreter/MemoryManager.h"
+
+namespace luci_interpreter
+{
+
+class SimpleMemoryManager : public IMemoryManager
+{
+public:
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h
new file mode 100644
index 000000000..ded7bde79
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+
+#include "luci_interpreter/MemoryManager.h"
+
+namespace luci_interpreter
+{
+
+// Used for allocations in static buffer, using offsets defined in luci model.
+class StaticMemoryManager : public IMemoryManager
+{
+public:
+  StaticMemoryManager() = delete;
+
+  explicit StaticMemoryManager(uint8_t *buffer_ptr) : _buffer_ptr(buffer_ptr)
+  { /* Do nothing */
+  }
+
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+
+private:
+  // Stores a pointer to the beginning of the allocated memory buffer.
+  uint8_t *_buffer_ptr;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/TestMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/TestMemoryManager.h
new file mode 100644
index 000000000..397bbed76
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/TestMemoryManager.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+
+#include "luci_interpreter/MemoryManager.h"
+
+namespace luci_interpreter
+{
+// Memory Manager for using in kernels tests. This eliminates the need to manually delete the
+// allocated memory in tests. This mem_manager remembers all its allocations and in destructor
+// delete all allocations.
+class TestMemoryManager : public IMemoryManager
+{
+public:
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+
+  ~TestMemoryManager() override
+  {
+    for (auto allocation : allocations)
+    {
+      delete[] allocation;
+    }
+  }
+
+private:
+  std::vector<uint8_t *> allocations;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/DataType.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/DataType.h
new file mode 100644
index 000000000..27bf719b5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/DataType.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_DATATYPE_H
+#define LUCI_INTERPRETER_CORE_DATATYPE_H
+
+#include <loco/IR/DataType.h>
+#include <loco/IR/DataTypeTraits.h>
+
+#include <cstddef>
+
+namespace luci_interpreter
+{
+
+using DataType = loco::DataType;
+
+template <DataType DT> using DataTypeImpl = loco::DataTypeImpl<DT>;
+
+inline size_t getDataTypeSize(DataType data_type) { return loco::size(data_type); }
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_DATATYPE_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h
new file mode 100644
index 000000000..bb9ff6d4a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_TENSOR_H
+#define LUCI_INTERPRETER_CORE_TENSOR_H
+
+#include "luci_interpreter/core/DataType.h"
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+class Shape
+{
+public:
+  explicit Shape(int rank) : _dims(rank, 0) {}
+
+  Shape(std::initializer_list<int32_t> dims) : _dims(dims.begin(), dims.end()) {}
+
+  int num_dims() const { return _dims.size(); }
+
+  int32_t dim(int i) const
+  {
+    assert(i >= 0 && i < static_cast<int>(_dims.size()));
+    return _dims[i];
+  }
+
+  int32_t &dim(int i)
+  {
+    assert(i >= 0 && i < static_cast<int>(_dims.size()));
+    return _dims[i];
+  }
+
+  int32_t num_elements() const
+  {
+    int32_t result = 1;
+    for (const int32_t dim : _dims)
+    {
+      result *= dim;
+    }
+    return result;
+  }
+
+  bool operator==(const Shape &other) const { return _dims == other._dims; }
+
+  bool operator!=(const Shape &other) const { return !operator==(other); }
+
+private:
+  std::vector<int32_t> _dims;
+};
+
+// Tensor affine quantization parameters.
+//
+// The relationship between real and quantized values:
+//   real_value = (quantized_value - zero_point) * scale
+//
+// In per-tensor case, 'scale' and 'zero_point' are one element each.
+// In per-channel case, 'scale' and 'zero_point' are N elements each, where N is the size
+// of the quantized dimension.
+//
+// Note that due to historical and performance reasons, per-tensor quantization uses unsigned
+// integer types, while per-channel uses signed types assuming 'zero_point' == 0.
+struct AffineQuantization
+{
+  std::vector<float> scale;
+  std::vector<int32_t> zero_point;
+  int32_t quantized_dimension;
+};
+
+class Tensor
+{
+public:
+  Tensor(DataType element_type, Shape shape, AffineQuantization quantization, std::string name);
+
+  DataType element_type() const { return _element_type; }
+
+  const Shape &shape() const { return _shape; }
+
+  float scale() const
+  {
+    assert(_quantization.scale.size() == 1);
+    return _quantization.scale[0];
+  }
+
+  int32_t zero_point() const
+  {
+    assert(_quantization.zero_point.size() == 1);
+    return _quantization.zero_point[0];
+  }
+
+  const std::vector<float> &scales() const { return _quantization.scale; }
+
+  const std::vector<int32_t> &zero_points() const { return _quantization.zero_point; }
+
+  int32_t quantized_dimension() const { return _quantization.quantized_dimension; }
+
+  template <typename T> const T *data() const
+  {
+    static_assert(std::is_same<uint8_t, char>::value or
+                  std::is_same<uint8_t, unsigned char>::value);
+    return reinterpret_cast<const T *>(_data);
+  }
+
+  template <typename T> T *data()
+  {
+    static_assert(std::is_same<uint8_t, char>::value or
+                  std::is_same<uint8_t, unsigned char>::value);
+    return reinterpret_cast<T *>(_data);
+  }
+
+  const std::string &name() const { return _name; }
+
+  void readData(void *data_ptr, size_t data_size) const;
+
+  void writeData(const void *data_ptr, size_t data_size);
+
+  void resize(const Shape &new_shape);
+
+  void set_data_buffer(uint8_t *buffer)
+  {
+    if (buffer == nullptr)
+    {
+      _data_allocated = false;
+    }
+    else
+    {
+      _data_allocated = true;
+    }
+    _data = buffer;
+  }
+
+  bool is_observable() const { return _is_observable; }
+
+  void set_observable(bool value) { _is_observable = value; }
+
+  bool is_allocatable() const { return _is_allocatable; }
+
+  void set_allocatable(bool value) { _is_allocatable = value; }
+
+  bool is_data_allocated() const { return _data_allocated; }
+
+  int32_t get_offset() const { return _offset; }
+
+  void set_offset(int32_t offset) { _offset = offset; }
+
+private:
+  DataType _element_type;
+  Shape _shape;
+  AffineQuantization _quantization;
+  uint8_t *_data;
+  std::string _name;
+  bool _data_allocated;
+  // Write of tensor is reported to registered Observers only if this tensor is observable
+  // This is needed for tensors used in kernel implementation, but not present in original model.
+  bool _is_observable = true;
+  // Memory manager is called for tensor only if it is "allocatable".
+  // Kernel configuration could disable allocation of some tensors if they are not needed for
+  // particular operation.
+  bool _is_allocatable = true;
+  // Used by static memory manager.
+  // Stores the offset from the beginning of the allocated memory buffer.
+  int32_t _offset = -1;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_TENSOR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
new file mode 100644
index 000000000..f0df58db3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
@@ -0,0 +1,62 @@
+REGISTER_KERNEL(Add)
+REGISTER_KERNEL(ArgMax)
+REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchToSpaceND)
+REGISTER_KERNEL(Cast)
+REGISTER_KERNEL(Concatenation)
+REGISTER_KERNEL(Conv2D)
+REGISTER_KERNEL(DepthToSpace)
+REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
+REGISTER_KERNEL(Div)
+REGISTER_KERNEL(Elu)
+REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
+REGISTER_KERNEL(Fill)
+REGISTER_KERNEL(Floor)
+REGISTER_KERNEL(FloorDiv)
+REGISTER_KERNEL(Equal)
+REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Greater)
+REGISTER_KERNEL(GreaterEqual)
+REGISTER_KERNEL(If)
+REGISTER_KERNEL(InstanceNorm)
+REGISTER_KERNEL(L2Normalize)
+REGISTER_KERNEL(L2Pool2D)
+REGISTER_KERNEL(LeakyRelu)
+REGISTER_KERNEL(Less)
+REGISTER_KERNEL(LessEqual)
+REGISTER_KERNEL(LogicalAnd)
+REGISTER_KERNEL(LogicalNot)
+REGISTER_KERNEL(LogicalOr)
+REGISTER_KERNEL(Logistic)
+REGISTER_KERNEL(Maximum)
+REGISTER_KERNEL(MaxPool2D)
+REGISTER_KERNEL(Minimum)
+REGISTER_KERNEL(MirrorPad)
+REGISTER_KERNEL(Mul)
+REGISTER_KERNEL(Neg)
+REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(Pad)
+REGISTER_KERNEL(PadV2)
+REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
+REGISTER_KERNEL(Reshape)
+REGISTER_KERNEL(ResizeBilinear)
+REGISTER_KERNEL(ResizeNearestNeighbor)
+REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Shape)
+REGISTER_KERNEL(Softmax)
+REGISTER_KERNEL(SpaceToBatchND)
+REGISTER_KERNEL(SpaceToDepth)
+REGISTER_KERNEL(StridedSlice)
+REGISTER_KERNEL(Sqrt)
+REGISTER_KERNEL(Square)
+REGISTER_KERNEL(SquaredDifference)
+REGISTER_KERNEL(Squeeze)
+REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
+REGISTER_KERNEL(Tanh)
+REGISTER_KERNEL(Transpose)
+REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(While)
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALArgMax.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALArgMax.h
new file mode 100644
index 000000000..21e63296d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALArgMax.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T1, typename T2, typename T3>
+static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data,
+                             const T2 *axis, const tflite::RuntimeShape &output_shape,
+                             T3 *output_data, const std::greater<T1> cmp)
+{
+  tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ARGMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h
new file mode 100644
index 000000000..a274afb7e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+                               const tflite::RuntimeShape &input_shape, const T *input_data,
+                               const tflite::RuntimeShape &output_shape, T *output_data,
+                               const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "AveragePool NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+                                const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                                const tflite::RuntimeShape &output_shape, int8_t *output_data,
+                                const tflite::RuntimeShape &scratchpad_shape,
+                                int8_t *scratchpad_data)
+{
+  assert(input_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  assert(scratchpad_data != nullptr);
+
+  const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+  assert(batches == 1);
+
+  const int depth = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+
+  cmsis_nn_dims input_dims;
+  input_dims.n = 1;
+  input_dims.h = input_shape.Dims(1);
+  input_dims.w = input_shape.Dims(2);
+  input_dims.c = depth;
+
+  cmsis_nn_dims output_dims;
+  output_dims.n = 1;
+  output_dims.h = output_shape.Dims(1);
+  output_dims.w = output_shape.Dims(2);
+  output_dims.c = depth;
+
+  cmsis_nn_pool_params pool_params;
+  pool_params.stride.h = params.stride_height;
+  pool_params.stride.w = params.stride_width;
+  pool_params.padding.h = params.padding_values.height;
+  pool_params.padding.w = params.padding_values.width;
+  pool_params.activation.min = params.quantized_activation_min;
+  pool_params.activation.max = params.quantized_activation_max;
+
+  cmsis_nn_dims filter_dims;
+  filter_dims.n = 1;
+  filter_dims.h = params.filter_height;
+  filter_dims.w = params.filter_width;
+  filter_dims.c = 1;
+
+  cmsis_nn_context ctx;
+  ctx.buf = scratchpad_data;
+  ctx.size = scratchpad_shape.Dims(0);
+  auto res = arm_avgpool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims, &output_dims,
+                            output_data);
+  assert(res == ARM_MATH_SUCCESS);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  if (input_data_type == luci_interpreter::DataType::S8)
+  {
+    assert(input_shape.DimensionsCount() == 4);
+    assert(output_shape.DimensionsCount() == 4);
+
+    const int32_t output_width = output_shape.Dims(2);
+    const int32_t depth = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+
+    const int32_t buf_size = arm_avgpool_s8_get_buffer_size(output_width, depth);
+    auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+
+    luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size};
+    scratchpad->resize(scratchpad_shape);
+  }
+  else
+  {
+    scratchpad->set_allocatable(false);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h
new file mode 100644
index 000000000..4dd77ffdc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::BatchToSpaceND(
+    unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h
new file mode 100644
index 000000000..cfb84ea60
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/conv.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const float *input_data, const tflite::RuntimeShape &filter_shape,
+                        const float *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const float *bias_data, const tflite::RuntimeShape &output_shape,
+                        float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        float *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data,
+                              tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+                        const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                        uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        uint8 *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+                              scratchpad_data, nullptr);
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+                                  const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+                                  const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+                                  const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                                  const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                                  int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                                  int8 *scratchpad_data)
+{
+  if (scratchpad_data)
+  {
+    cmsis_nn_conv_params conv_params;
+    conv_params.dilation.h = params.dilation_height_factor;
+    conv_params.dilation.w = params.dilation_width_factor;
+
+    assert(conv_params.dilation.h == 1);
+    assert(conv_params.dilation.w == 1);
+
+    conv_params.input_offset = params.input_offset;
+    conv_params.output_offset = params.output_offset;
+    conv_params.stride.h = params.stride_height;
+    conv_params.stride.w = params.stride_width;
+    conv_params.padding.h = params.padding_values.height;
+    conv_params.padding.w = params.padding_values.width;
+    conv_params.activation.min = params.quantized_activation_min;
+    conv_params.activation.max = params.quantized_activation_max;
+
+    cmsis_nn_per_channel_quant_params quant_params;
+    quant_params.multiplier = const_cast<int32_t *>(mult);
+    quant_params.shift = const_cast<int32_t *>(shifts);
+
+    assert(conv_params.activation.min <= conv_params.activation.max);
+    assert(input_shape.DimensionsCount() == 4);
+    assert(filter_shape.DimensionsCount() == 4);
+    assert(output_shape.DimensionsCount() == 4);
+    const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+    if (bias_data)
+    {
+      assert(bias_shape.FlatSize() == output_depth);
+    }
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batch_size;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_depth;
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = output_depth;
+    filter_dims.h = filter_shape.Dims(1);
+    filter_dims.w = filter_shape.Dims(2);
+    filter_dims.c = input_depth;
+
+    cmsis_nn_dims bias_dims;
+    bias_dims.n = 1;
+    bias_dims.h = 1;
+    bias_dims.w = 1;
+    bias_dims.c = output_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batch_size;
+    output_dims.h = output_shape.Dims(1);
+    output_dims.w = output_shape.Dims(2);
+    output_dims.c = output_depth;
+
+    cmsis_nn_context ctx;
+    ctx.buf = scratchpad_data;
+    ctx.size = scratchpad_shape.Dims(0);
+
+    auto res = arm_convolve_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims, input_data,
+                                       &filter_dims, filter_data, &bias_dims, bias_data,
+                                       &output_dims, output_data);
+    assert(res == ARM_MATH_SUCCESS);
+  }
+  else
+  {
+    tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+                                                  filter_shape, filter_data, bias_shape, bias_data,
+                                                  output_shape, output_data);
+  }
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::ConvParams &params,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  cmsis_nn_conv_params conv_params;
+  conv_params.dilation.h = params.dilation_height_factor;
+  conv_params.dilation.w = params.dilation_width_factor;
+
+  if (input_data_type == loco::DataType::S8 && conv_params.dilation.h == 1 &&
+      conv_params.dilation.w == 1)
+  {
+    const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int32_t output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+    const int32_t filter_height = filter_shape.Dims(1);
+    const int32_t filter_width = filter_shape.Dims(2);
+    const int32_t output_height = output_shape.Dims(1);
+    const int32_t output_width = output_shape.Dims(2);
+
+    conv_params.input_offset = params.input_offset;
+    conv_params.output_offset = params.output_offset;
+    conv_params.stride.h = params.stride_height;
+    conv_params.stride.w = params.stride_width;
+    conv_params.padding.h = params.padding_values.height;
+    conv_params.padding.w = params.padding_values.width;
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batches;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_depth;
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = output_depth;
+    filter_dims.h = filter_height;
+    filter_dims.w = filter_width;
+    filter_dims.c = input_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batches;
+    output_dims.h = output_height;
+    output_dims.w = output_width;
+    output_dims.c = output_depth;
+
+    const int32_t buf_size = arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims,
+                                                                     &filter_dims, &output_dims);
+
+    luci_interpreter::Shape scratchpad_shape{buf_size};
+    scratchpad->resize(scratchpad_shape);
+  }
+  else
+  {
+    scratchpad->set_allocatable(false);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h
new file mode 100644
index 000000000..8463e571e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+
+#include <tensorflow/lite/kernels/internal/reference/depth_to_space.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::DepthToSpace(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h
new file mode 100644
index 000000000..120dcd803
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+                        const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+                        const T *input_data, const tflite::RuntimeShape &filter_shape,
+                        const T *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+                        T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "DepthwiseConvPerChannel NYI");
+    (void)params;
+    (void)output_multiplier;
+    (void)output_shift;
+    (void)input_shape;
+    (void)output_data;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+  const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+  const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+  const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+  const tflite::RuntimeShape &output_shape, int8_t *output_data,
+  const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+  if (scratchpad_data)
+  {
+    cmsis_nn_dw_conv_params dw_conv_params;
+    dw_conv_params.dilation.h = params.dilation_height_factor;
+    dw_conv_params.dilation.w = params.dilation_width_factor;
+    assert(dw_conv_params.dilation.h == 1);
+    assert(dw_conv_params.dilation.w == 1);
+
+    dw_conv_params.input_offset = params.input_offset;
+    dw_conv_params.output_offset = params.output_offset;
+    dw_conv_params.stride.h = params.stride_height;
+    dw_conv_params.stride.w = params.stride_width;
+    dw_conv_params.padding.h = params.padding_values.height;
+    dw_conv_params.padding.w = params.padding_values.width;
+
+    dw_conv_params.activation.min = params.quantized_activation_min;
+    dw_conv_params.activation.max = params.quantized_activation_max;
+    dw_conv_params.ch_mult = params.depth_multiplier;
+
+    cmsis_nn_per_channel_quant_params quant_params;
+    int32_t output_multiplier = params.output_multiplier;
+    int32_t output_shift = params.output_shift;
+
+    quant_params.multiplier = &output_multiplier;
+    quant_params.shift = &output_shift;
+
+    assert(dw_conv_params.activation.min <= dw_conv_params.activation.max);
+    const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
+    if (bias_data)
+    {
+      assert(bias_shape.FlatSize() == output_depth);
+    }
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batch_size;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_shape.Dims(3);
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = filter_shape.Dims(0);
+    filter_dims.h = filter_shape.Dims(1);
+    filter_dims.w = filter_shape.Dims(2);
+    filter_dims.c = output_depth;
+
+    cmsis_nn_dims bias_dims;
+    bias_dims.n = 1;
+    bias_dims.h = 1;
+    bias_dims.w = 1;
+    bias_dims.c = output_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batch_size;
+    output_dims.h = output_shape.Dims(1);
+    output_dims.w = output_shape.Dims(2);
+    output_dims.c = output_depth;
+
+    cmsis_nn_context ctx;
+    ctx.buf = scratchpad_data;
+    ctx.size = scratchpad_shape.Dims(0);
+
+    auto res = arm_depthwise_conv_wrapper_s8(&ctx, &dw_conv_params, &quant_params, &input_dims,
+                                             input_data, &filter_dims, filter_data, &bias_dims,
+                                             bias_data, &output_dims, output_data);
+    assert(res == ARM_MATH_SUCCESS);
+  }
+  else
+  {
+    tflite::reference_integer_ops::DepthwiseConvPerChannel(
+      params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+      bias_shape, bias_data, output_shape, output_data);
+  }
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const tflite::DepthwiseParams &params,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  cmsis_nn_dw_conv_params dw_conv_params;
+  dw_conv_params.dilation.h = params.dilation_height_factor;
+  dw_conv_params.dilation.w = params.dilation_width_factor;
+
+  if (input_data_type == loco::DataType::S8 && dw_conv_params.dilation.h == 1 &&
+      dw_conv_params.dilation.w == 1)
+  {
+    const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batch_size;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_shape.Dims(3);
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = filter_shape.Dims(0);
+    filter_dims.h = filter_shape.Dims(1);
+    filter_dims.w = filter_shape.Dims(2);
+    filter_dims.c = output_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batch_size;
+    output_dims.h = output_shape.Dims(1);
+    output_dims.w = output_shape.Dims(2);
+    output_dims.c = output_depth;
+
+    const int32_t buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size(
+      &dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+    auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+
+    luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size};
+    scratchpad->resize(scratchpad_shape);
+  }
+  else
+  {
+    scratchpad->set_allocatable(false);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h
new file mode 100644
index 000000000..15ff0327b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const T *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
+                                               output_data);
+}
+
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALElu.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALElu.h
new file mode 100644
index 000000000..4089d0a0c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALElu.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ELU_H
+#define LUCI_INTERPRETER_PAL_ELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/elu.h>
+
+namespace luci_interpreter_pal
+{
+
+static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data,
+                       const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::Elu(input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h
new file mode 100644
index 000000000..32e905761
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+                                  const tflite::RuntimeShape &input_shape, const T *input_data,
+                                  const tflite::RuntimeShape &filter_shape, const T *filter_data,
+                                  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                                  const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  {
+    // MARK: At this moment this operation doesn't support
+    assert(false && "FullyConnected NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+  }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+                       const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                       const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+                       const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                       const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+  assert(output_shape.DimensionsCount() == 2);
+
+  const int batches = output_shape.Dims(0);
+  const int output_depth = output_shape.Dims(1);
+
+  const int filter_dim_count = filter_shape.DimensionsCount();
+  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
+
+  cmsis_nn_fc_params fc_params;
+  fc_params.input_offset = params.input_offset;
+  fc_params.output_offset = params.output_offset;
+  fc_params.filter_offset = params.weights_offset;
+  fc_params.activation.min = params.quantized_activation_min;
+  fc_params.activation.max = params.quantized_activation_max;
+
+  cmsis_nn_per_tensor_quant_params quant_params;
+  quant_params.multiplier = params.output_multiplier;
+  quant_params.shift = params.output_shift;
+
+  cmsis_nn_dims input_dims;
+  input_dims.n = batches;
+  input_dims.h = 1;
+  input_dims.w = 1;
+  input_dims.c = accum_depth;
+
+  cmsis_nn_dims filter_dims;
+  filter_dims.n = accum_depth;
+  filter_dims.h = 1;
+  filter_dims.w = 1;
+  filter_dims.c = output_depth;
+
+  cmsis_nn_dims bias_dims;
+  bias_dims.n = 1;
+  bias_dims.h = 1;
+  bias_dims.w = 1;
+  bias_dims.c = output_depth;
+
+  cmsis_nn_dims output_dims;
+  output_dims.n = batches;
+  output_dims.h = 1;
+  output_dims.w = 1;
+  output_dims.c = output_depth;
+
+  int32_t buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims);
+  auto buffer = std::make_unique<int8_t[]>(buf_size);
+  assert(buffer != nullptr);
+
+  cmsis_nn_context ctx;
+  ctx.buf = buffer.get();
+  ctx.size = buf_size;
+
+  auto res =
+    arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims,
+                           filter_data, &bias_dims, bias_data, &output_dims, output_data);
+  assert(res == ARM_MATH_SUCCESS);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Normalize.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Normalize.h
new file mode 100644
index 000000000..f84742a44
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Normalize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+
+#include <tensorflow/lite/kernels/internal/reference/l2normalization.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Normalization(const tflite::L2NormalizationParams &op_params,
+                                   const tflite::RuntimeShape &input_shape, const T *input_data,
+                                   const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::L2Normalization(op_params, input_shape, input_data, output_shape,
+                                         output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h
new file mode 100644
index 000000000..38a302fc6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+                          const T *input_data, const tflite::RuntimeShape &output_shape,
+                          T *output_data)
+{
+  tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h
new file mode 100644
index 000000000..9ccd2224f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H
+#define LUCI_INTERPRETER_PAL_LEAKYRELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+namespace luci_interpreter_pal
+{
+static inline void LeakyRelu(const tflite::LeakyReluParams &params,
+                             const tflite::RuntimeShape &input_shape, const float *input_data,
+                             const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALMul.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALMul.h
new file mode 100644
index 000000000..347a97a83
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALMul.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/mul.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                       const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                       const T *input2_data, const tflite::RuntimeShape &output_shape,
+                       T *output_data)
+{
+  tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                   const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                   const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALNeg.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALNeg.h
new file mode 100644
index 000000000..be5903a0c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALNeg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_NEG_H
+#define LUCI_INTERPRETER_PAL_NEG_H
+
+#include <tensorflow/lite/kernels/internal/reference/neg.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data,
+                          const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_NEG_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h
new file mode 100644
index 000000000..6046789ae
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+                            const tflite::RuntimeShape &input_shape, const float *input_data,
+                            const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+                              int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+                              int32_t input_zero_point, int32_t output_zero_point,
+                              Output *output_data)
+{
+  tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
+                                    effective_scale_shift, input_zero_point, output_zero_point,
+                                    output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h
new file mode 100644
index 000000000..cc9f0fd54
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+               const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+               const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+                                        output_size_shape, output_size_data,
+                                        unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h
new file mode 100644
index 000000000..f4d5a6ed3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+                      const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+                      const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+                      const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+                                               output_size_shape, output_size_data,
+                                               unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h
new file mode 100644
index 000000000..a4a5b2a78
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+            const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+            const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+            const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+            const int32_t *bias_data, int16_t *activation_state_data,
+            const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+            int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+            int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+  const int32_t rank = params.rank;
+  const int32_t batch_size = input_shape.Dims(0);
+  const int32_t num_filters = weight_feature_shape.Dims(0);
+  const int32_t memory_size = weight_time_shape.Dims(1);
+
+  cmsis_nn_dims input_dims;
+  input_dims.n = input_shape.Dims(0);
+  input_dims.h = input_shape.Dims(1);
+
+  cmsis_nn_dims weights_feature_dims;
+  weights_feature_dims.n = weight_feature_shape.Dims(0);
+  weights_feature_dims.h = weight_feature_shape.Dims(1);
+
+  cmsis_nn_dims weights_time_dims;
+  weights_time_dims.n = weight_time_shape.Dims(0);
+  weights_time_dims.h = weight_time_shape.Dims(1);
+
+  cmsis_nn_dims bias_dims;
+  bias_dims.n = bias_shape.Dims(0);
+
+  cmsis_nn_dims state_dims;
+  state_dims.n = batch_size;
+  state_dims.h = memory_size * num_filters;
+
+  cmsis_nn_dims output_dims;
+  output_dims.n = output_shape.Dims(0);
+  output_dims.h = output_shape.Dims(1);
+
+  cmsis_nn_svdf_params svdf_params;
+  svdf_params.rank = params.rank;
+  svdf_params.input_offset = input_zp;
+  svdf_params.output_offset = output_zp;
+
+  svdf_params.input_activation.min = INT16_MIN;
+  svdf_params.input_activation.max = INT16_MAX;
+
+  svdf_params.output_activation.min = INT8_MIN;
+  svdf_params.output_activation.max = INT8_MAX;
+
+  cmsis_nn_per_tensor_quant_params in_quant_params;
+  in_quant_params.multiplier = scale_1_a;
+  in_quant_params.shift = scale_1_b;
+
+  cmsis_nn_per_tensor_quant_params out_quant_params;
+  out_quant_params.multiplier = scale_2_a;
+  out_quant_params.shift = scale_2_b;
+
+  cmsis_nn_context scratch_ctx;
+  scratch_ctx.buf = scratchpad_data;
+
+  cmsis_nn_context scratch_output_ctx;
+  scratch_output_ctx.buf = output_temp_data;
+
+  arm_svdf_s8(&scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, &out_quant_params,
+              &input_dims, input_data, &state_dims, activation_state_data, &weights_feature_dims,
+              weight_feature_data, &weights_time_dims, weight_time_data, &bias_dims, bias_data,
+              &output_dims, output_data);
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+          const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+          const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+          const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+          const float *bias_data, float *scratchpad_data, float *activation_state_data,
+          const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  const int32_t rank = params.rank;
+  const int32_t batch_size = input_shape.Dims(0);
+  const int32_t input_size = input_shape.Dims(1);
+  const int32_t num_filters = weight_feature_shape.Dims(0);
+  const int32_t num_units = num_filters / rank;
+  const int32_t memory_size = weight_time_shape.Dims(1);
+
+  // Left shift the activation_state.
+  {
+    float *new_state_start = activation_state_data;
+    const float *old_state_start = activation_state_data + 1;
+    const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
+    while (old_state_start != old_state_end)
+    {
+      *new_state_start++ = *old_state_start++;
+    }
+  }
+
+  // Note: no need to clear the latest activation, matmul is not accumulative.
+
+  // Compute conv1d(inputs, weights_feature).
+  // The activation_state's rightmost column is used to save current cycle
+  // activation. This is achieved by starting at state_ptr[memory_size - 1] and
+  // having the stride equal to memory_size.
+
+  // Perform batched matrix vector multiply operation:
+  {
+    const float *matrix = weight_feature_data;
+    const float *vector = input_data;
+    float *result = &activation_state_data[memory_size - 1];
+    float *result_in_batch = result;
+    for (int i = 0; i < batch_size; ++i)
+    {
+      const float *matrix_ptr = matrix;
+      for (int j = 0; j < num_filters; ++j)
+      {
+        float dot_prod = 0.0f;
+        const float *vector_in_batch = vector + i * input_size;
+        for (int k = 0; k < input_size; ++k)
+        {
+          dot_prod += *matrix_ptr++ * *vector_in_batch++;
+        }
+        *result_in_batch = dot_prod;
+        result_in_batch += memory_size;
+      }
+    }
+  }
+
+  tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
+    batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
+    params.activation, activation_state_data, scratchpad_data, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+  const luci_interpreter::DataType &input_data_type,
+  const luci_interpreter::DataType &weight_feature_data_type,
+  luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+  luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+  luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+  const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+  const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+  if (input_data_type == loco::DataType::FLOAT32 &&
+      (weight_feature_data_type == loco::DataType::S8 ||
+       weight_feature_data_type == loco::DataType::U8))
+  {
+    (void)input_shape;
+    (void)weight_time_shape;
+    (void)scratchpad_3;
+    (void)scratchpad_4;
+    (void)scratchpad_5;
+    (void)scratchpad_6;
+
+    throw std::runtime_error("Hybrid type is not supported for cmsisnn");
+  }
+
+  // Resize scratchpad_1 tensor
+  scratchpad_1->resize({batch_size, num_filters});
+
+  if (input_data_type == loco::DataType::S8)
+  {
+    // Resize scratchpad_2 for full_integer op
+    scratchpad_2->resize({batch_size, num_units});
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSoftmax.h
new file mode 100644
index 000000000..6bbda4867
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSoftmax.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+                                              float beta)
+{
+  // Do nothing for mcu
+  (void)data;
+  (void)input_scale;
+  (void)beta;
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+  int32 input_beta_multiplier;
+  int input_beta_left_shift;
+  static const int kScaledDiffIntegerBits = 5;
+  tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits,
+                                   &input_beta_multiplier, &input_beta_left_shift);
+
+  params->input_multiplier = input_beta_multiplier;
+  params->input_left_shift = input_beta_left_shift;
+  params->diff_min =
+    -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift);
+}
+
+template <typename T>
+static inline void Softmax(const tflite::SoftmaxParams &params,
+                           const tflite::RuntimeShape &input_shape, const T *input_data,
+                           const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  // MARK: At this moment this operation doesn't support on mcu
+  assert(false && "Softmax NYI");
+  (void)params;
+  (void)input_shape;
+  (void)input_data;
+  (void)output_shape;
+  (void)output_data;
+}
+
+template <>
+inline void Softmax<int8_t>(const tflite::SoftmaxParams &params,
+                            const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                            const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size = tflite::MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth = tflite::MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+  const int32_t mult = params.input_multiplier;
+  const int32_t shift = params.input_left_shift;
+  const int32_t diff_min = params.diff_min;
+
+  arm_softmax_s8(input_data, outer_size, depth, mult, shift, diff_min, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h
new file mode 100644
index 000000000..fdddaa929
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams &params,
+               const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::SpaceToBatchND(
+    params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h
new file mode 100644
index 000000000..816b7f663
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_depth.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::SpaceToDepth(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSub.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSub.h
new file mode 100644
index 000000000..ea57578c6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSub.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_H
+#define LUCI_INTERPRETER_PAL_SUB_H
+
+#include <tensorflow/lite/kernels/internal/reference/sub.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const tflite::ArithmeticParams &params,
+                       const tflite::RuntimeShape &input1_shape, const T *input1_data,
+                       const tflite::RuntimeShape &input2_shape, const T *input2_data,
+                       const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+                             output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/pal.cmake b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/pal.cmake
new file mode 100644
index 000000000..a68b363d9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/pal.cmake
@@ -0,0 +1,65 @@
+macro(initialize_pal)
+    nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
+    nnas_find_package(CMSISSource EXACT 5.8.0 QUIET)
+
+    if (NOT TensorFlowSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowGEMMLowpSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowEigenSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Eigen not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowRuySource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Ruy not found")
+        return()
+    endif ()
+
+    if (NOT CMSISSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: CMSISSource not found")
+        return()
+    endif ()
+
+    set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+    target_include_directories(${TGT} PRIVATE "${PAL}")
+    target_include_directories(${TGT} PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}")
+    target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+    file(GLOB_RECURSE PAL_SOURCES "${CMSISSource_DIR}/CMSIS/NN/Source/*.c")
+    list(APPEND PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
+    add_library(luci_interpreter_cmsisnn_pal STATIC ${PAL_SOURCES})
+    set_property(TARGET luci_interpreter_cmsisnn_pal PROPERTY POSITION_INDEPENDENT_CODE ON)
+    target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}"
+    )
+
+    add_subdirectory(${CMSISSource_DIR}/CMSIS/NN ${CMAKE_CURRENT_BINARY_DIR}/CMSISNN)
+    target_include_directories(luci_interpreter_cmsisnn_pal PUBLIC
+            "${CMSISSource_DIR}/CMSIS/NN/Include"
+            "${CMSISSource_DIR}/CMSIS/DSP/Include"
+            "${CMSISSource_DIR}/CMSIS/Core/Include")
+
+    target_link_libraries(${TGT} PRIVATE luci_interpreter_cmsisnn_pal)
+endmacro()
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/KernelsToBuild.lst b/compiler/luci-micro/luci-interpreter/pal/linux/KernelsToBuild.lst
new file mode 100644
index 000000000..8e20559f9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/KernelsToBuild.lst
@@ -0,0 +1,77 @@
+REGISTER_KERNEL(Add)
+REGISTER_KERNEL(ArgMax)
+REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchMatMul)
+REGISTER_KERNEL(BatchToSpaceND)
+REGISTER_KERNEL(Cast)
+REGISTER_KERNEL(Concatenation)
+REGISTER_KERNEL(Conv2D)
+REGISTER_KERNEL(DepthToSpace)
+REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
+REGISTER_KERNEL(Div)
+REGISTER_KERNEL(Elu)
+REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
+REGISTER_KERNEL(Fill)
+REGISTER_KERNEL(Floor)
+REGISTER_KERNEL(FloorDiv)
+REGISTER_KERNEL(Equal)
+REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Gather)
+REGISTER_KERNEL(Greater)
+REGISTER_KERNEL(GreaterEqual)
+REGISTER_KERNEL(If)
+REGISTER_KERNEL(InstanceNorm)
+REGISTER_KERNEL(L2Normalize)
+REGISTER_KERNEL(L2Pool2D)
+REGISTER_KERNEL(LeakyRelu)
+REGISTER_KERNEL(Less)
+REGISTER_KERNEL(LessEqual)
+REGISTER_KERNEL(LocalResponseNormalization)
+REGISTER_KERNEL(LogicalAnd)
+REGISTER_KERNEL(LogicalNot)
+REGISTER_KERNEL(LogicalOr)
+REGISTER_KERNEL(Logistic)
+REGISTER_KERNEL(LogSoftmax)
+REGISTER_KERNEL(Maximum)
+REGISTER_KERNEL(MaxPool2D)
+REGISTER_KERNEL(Mean)
+REGISTER_KERNEL(Minimum)
+REGISTER_KERNEL(MirrorPad)
+REGISTER_KERNEL(Mul)
+REGISTER_KERNEL(Neg)
+REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(OneHot)
+REGISTER_KERNEL(Pack)
+REGISTER_KERNEL(Pad)
+REGISTER_KERNEL(PadV2)
+REGISTER_KERNEL(Pow)
+REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
+REGISTER_KERNEL(Relu)
+REGISTER_KERNEL(Relu6)
+REGISTER_KERNEL(Reshape)
+REGISTER_KERNEL(ResizeBilinear)
+REGISTER_KERNEL(ResizeNearestNeighbor)
+REGISTER_KERNEL(ReverseV2)
+REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Shape)
+REGISTER_KERNEL(Slice)
+REGISTER_KERNEL(Softmax)
+REGISTER_KERNEL(SpaceToBatchND)
+REGISTER_KERNEL(SpaceToDepth)
+REGISTER_KERNEL(Split)
+REGISTER_KERNEL(SplitV)
+REGISTER_KERNEL(StridedSlice)
+REGISTER_KERNEL(Sqrt)
+REGISTER_KERNEL(Square)
+REGISTER_KERNEL(SquaredDifference)
+REGISTER_KERNEL(Squeeze)
+REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
+REGISTER_KERNEL(Tanh)
+REGISTER_KERNEL(Transpose)
+REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(Unpack)
+REGISTER_KERNEL(While)
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALArgMax.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALArgMax.h
new file mode 100644
index 000000000..21e63296d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALArgMax.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T1, typename T2, typename T3>
+static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data,
+                             const T2 *axis, const tflite::RuntimeShape &output_shape,
+                             T3 *output_data, const std::greater<T1> cmp)
+{
+  tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ARGMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALAveragePool2d.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALAveragePool2d.h
new file mode 100644
index 000000000..cce30601f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALAveragePool2d.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+                               const tflite::RuntimeShape &input_shape, const T *input_data,
+                               const tflite::RuntimeShape &output_shape, T *output_data,
+                               const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation doesn't support
+    assert(false && "AveragePool NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+                                const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                                const tflite::RuntimeShape &output_shape, int8_t *output_data,
+                                const tflite::RuntimeShape &scratchpad_shape,
+                                int8_t *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+
+  tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape,
+                                             output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  (void)input_data_type;
+  (void)input_shape;
+  (void)output_shape;
+
+  scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchMatMul.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchMatMul.h
new file mode 100644
index 000000000..3894f2d92
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchMatMul.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHMATMUL_H
+#define LUCI_INTERPRETER_PAL_BATCHMATMUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_matmul.h>
+
+namespace luci_interpreter_pal
+{
+inline void BatchMatMul(const tflite::RuntimeShape &lhs_shape, const float *lhs_data,
+                        const tflite::RuntimeShape &rhs_shape, const float *rhs_data,
+                        const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::BatchMatMul(lhs_shape, lhs_data, rhs_shape, rhs_data, output_shape,
+                                     output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *lhs_scratchpad,
+                                         luci_interpreter::Tensor *rhs_scratchpad,
+                                         const tflite::RuntimeShape &lhs_shape,
+                                         const tflite::RuntimeShape &rhs_shape)
+{
+  // Scratchpad for transposed LHS
+  {
+    auto lhs_rank = lhs_shape.DimensionsCount();
+    luci_interpreter::Shape scratchpad_size(lhs_rank);
+    for (int i = 0; i < lhs_rank - 2; ++i)
+    {
+      scratchpad_size.dim(i) = lhs_shape.Dims(i);
+    }
+    scratchpad_size.dim(lhs_rank - 2) = lhs_shape.Dims(lhs_rank - 1);
+    scratchpad_size.dim(lhs_rank - 1) = lhs_shape.Dims(lhs_rank - 2);
+
+    lhs_scratchpad->resize(scratchpad_size);
+  }
+  // Scratchpad for transposed RHS
+  {
+    auto rhs_rank = rhs_shape.DimensionsCount();
+    luci_interpreter::Shape scratchpad_size(rhs_rank);
+    for (int i = 0; i < rhs_rank - 2; ++i)
+    {
+      scratchpad_size.dim(i) = rhs_shape.Dims(i);
+    }
+    scratchpad_size.dim(rhs_rank - 2) = rhs_shape.Dims(rhs_rank - 1);
+    scratchpad_size.dim(rhs_rank - 1) = rhs_shape.Dims(rhs_rank - 2);
+
+    rhs_scratchpad->resize(scratchpad_size);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHMATMUL_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchToSpaceND.h
new file mode 100644
index 000000000..3fe2022ed
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchToSpaceND.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::BatchToSpaceND(
+    unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALConv2d.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALConv2d.h
new file mode 100644
index 000000000..985a15f39
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALConv2d.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const float *input_data, const tflite::RuntimeShape &filter_shape,
+                        const float *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const float *bias_data, const tflite::RuntimeShape &output_shape,
+                        float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        float *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  if (scratchpad_data)
+  {
+    const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int32_t output_height = output_shape.Dims(1);
+    const int32_t output_width = output_shape.Dims(2);
+    const int32_t filter_height = filter_shape.Dims(1);
+    const int32_t filter_width = filter_shape.Dims(2);
+    tflite::RuntimeShape im2col_shape{batches, output_height, output_width,
+                                      input_depth * filter_height * filter_width};
+
+    tflite::optimized_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                                bias_shape, bias_data, output_shape, output_data, im2col_shape,
+                                scratchpad_data);
+  }
+  else
+    tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                                bias_shape, bias_data, output_shape, output_data,
+                                tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+                        const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                        uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        uint8 *scratchpad_data)
+{
+  // TODO This should only be done once (although it takes only a few microseconds).
+  //  Also, the user should be able to adjust the number of threads.
+  auto gemmlowp_context = std::make_unique<gemmlowp::GemmContext>();
+  gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency()));
+
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+                              scratchpad_data, gemmlowp_context.get());
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+                                  const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+                                  const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+                                  const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                                  const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                                  int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                                  int8 *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  // TODO enable optimized version
+  tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+                                                filter_shape, filter_data, bias_shape, bias_data,
+                                                output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::ConvParams &params,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  const int32_t filter_height = filter_shape.Dims(1);
+  const int32_t filter_width = filter_shape.Dims(2);
+
+  // Allocate tensor for scratchpad, if needed.
+  // The checks here should be aligned with the actual implementation.
+  const bool need_dilated_scratchpad =
+    params.dilation_height_factor != 1 || params.dilation_width_factor != 1;
+  const bool need_non_dilated_scratchpad = params.stride_height != 1 || params.stride_width != 1 ||
+                                           filter_height != 1 || filter_width != 1;
+  auto _need_scratchpad = input_data_type != luci_interpreter::DataType::S16 &&
+                          (need_dilated_scratchpad || need_non_dilated_scratchpad);
+
+  if (_need_scratchpad)
+  {
+    const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int32_t output_height = output_shape.Dims(1);
+    const int32_t output_width = output_shape.Dims(2);
+
+    auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+    int32_t scratchpad_size = batches * output_width * output_height * input_depth * filter_height *
+                              filter_width * data_type_size;
+    luci_interpreter::Shape scratchpad_shape{scratchpad_size};
+    scratchpad->resize(scratchpad_shape);
+  }
+  else
+  {
+    scratchpad->set_allocatable(false);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthToSpace.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthToSpace.h
new file mode 100644
index 000000000..f9ebfcfb5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthToSpace.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::DepthToSpace(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthwiseConv2d.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthwiseConv2d.h
new file mode 100644
index 000000000..c9d1a2948
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthwiseConv2d.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+                        const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+                        const T *input_data, const tflite::RuntimeShape &filter_shape,
+                        const T *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+                        T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "DepthwiseConvPerChannel NYI");
+    (void)params;
+    (void)output_multiplier;
+    (void)output_shift;
+    (void)input_shape;
+    (void)output_data;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+  const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+  const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+  const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+  const tflite::RuntimeShape &output_shape, int8_t *output_data,
+  const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_integer_ops::DepthwiseConvPerChannel(
+    params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+    bias_shape, bias_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const tflite::DepthwiseParams &params,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  (void)params;
+  (void)input_data_type;
+  (void)input_shape;
+  (void)filter_shape;
+  (void)output_shape;
+
+  scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALDequantize.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALDequantize.h
new file mode 100644
index 000000000..3af6d0777
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALDequantize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const T *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALElu.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALElu.h
new file mode 100644
index 000000000..cb365ffd0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALElu.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ELU_H
+#define LUCI_INTERPRETER_PAL_ELU_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data,
+                       const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::Elu(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALFullyConnected.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALFullyConnected.h
new file mode 100644
index 000000000..62970dbf7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALFullyConnected.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+                                  const tflite::RuntimeShape &input_shape, const T *input_data,
+                                  const tflite::RuntimeShape &filter_shape, const T *filter_data,
+                                  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                                  const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  {
+    // MARK: At this moment this operation doesn't support
+    assert(false && "FullyConnected NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+  }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+                       const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                       const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+                       const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                       const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+  tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape,
+                                                filter_data, bias_shape, bias_data, output_shape,
+                                                output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALGather.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALGather.h
new file mode 100644
index 000000000..49ac35f93
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALGather.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_GATHER_H
+#define LUCI_INTERPRETER_PAL_GATHER_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T, typename CoordsT = int32>
+static inline void Gather(const tflite::GatherParams &op_params,
+                          const tflite::RuntimeShape &input_shape, const T *input_data,
+                          const tflite::RuntimeShape &coords_shape, const CoordsT *coords_data,
+                          const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::Gather(op_params, input_shape, input_data, coords_shape, coords_data,
+                                output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_GATHER_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Normalize.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Normalize.h
new file mode 100644
index 000000000..6c663e21f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Normalize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Normalization(const tflite::L2NormalizationParams &op_params,
+                                   const tflite::RuntimeShape &input_shape, const T *input_data,
+                                   const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::L2Normalization(op_params, input_shape, input_data, output_shape,
+                                         output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Pool2D.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Pool2D.h
new file mode 100644
index 000000000..aac57f2b2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+                          const T *input_data, const tflite::RuntimeShape &output_shape,
+                          T *output_data)
+{
+  tflite::optimized_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALLeakyRelu.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALLeakyRelu.h
new file mode 100644
index 000000000..e8209bae6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALLeakyRelu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H
+#define LUCI_INTERPRETER_PAL_LEAKYRELU_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void LeakyRelu(const tflite::LeakyReluParams &params,
+                             const tflite::RuntimeShape &input_shape, const float *input_data,
+                             const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALLocalResponseNormalization.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALLocalResponseNormalization.h
new file mode 100644
index 000000000..54f7f0916
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALLocalResponseNormalization.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H
+#define LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params,
+                           const tflite::RuntimeShape &input_shape, const float *input_data,
+                           const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::LocalResponseNormalization(op_params, input_shape, input_data,
+                                                    output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALLogSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALLogSoftmax.h
new file mode 100644
index 000000000..a32e3eec6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALLogSoftmax.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LOGSOFTMAX_H
+#define LUCI_INTERPRETER_PAL_LOGSOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+                                              float beta)
+{
+  tflite::optimized_ops::PopulateSoftmaxLookupTable(data, input_scale, beta);
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+  // Do nothing for linux
+  (void)params;
+  (void)input_scale;
+  (void)beta;
+}
+
+static inline void LogSoftmax(const tflite::SoftmaxParams &params, float input_scale,
+                              const tflite::RuntimeShape &input_shape, const uint8 *input_data,
+                              const tflite::RuntimeShape &output_shape, uint8 *output_data)
+{
+  tflite::optimized_ops::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
+                                    output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LOGSOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALMul.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALMul.h
new file mode 100644
index 000000000..a8a9d4abc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALMul.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                       const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                       const T *input2_data, const tflite::RuntimeShape &output_shape,
+                       T *output_data)
+{
+  tflite::optimized_ops::Mul(params, input1_shape, input1_data, input2_shape, input2_data,
+                             output_shape, output_data);
+}
+
+template <>
+inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                const int64_t *input1_data, const tflite::RuntimeShape &input2_shape,
+                const int64_t *input2_data, const tflite::RuntimeShape &output_shape,
+                int64_t *output_data)
+{
+  tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                   const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                   const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALNeg.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALNeg.h
new file mode 100644
index 000000000..797ffee1b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALNeg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_NEG_H
+#define LUCI_INTERPRETER_PAL_NEG_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data,
+                          const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_NEG_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALQuantize.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALQuantize.h
new file mode 100644
index 000000000..bf1d7954e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+                            const tflite::RuntimeShape &input_shape, const float *input_data,
+                            const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+                              int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+                              int32_t input_zero_point, int32_t output_zero_point,
+                              Output *output_data)
+{
+  tflite::optimized_ops::Requantize(input_data, size, effective_scale_multiplier,
+                                    effective_scale_shift, input_zero_point, output_zero_point,
+                                    output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu.h
new file mode 100644
index 000000000..b4c715d3e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RELU_H
+#define LUCI_INTERPRETER_PAL_RELU_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Relu(const tflite::RuntimeShape &input_shape, const float *input_data,
+                        const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::Relu(input_shape, input_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void ReluX(const tflite::ReluParams &params, const tflite::RuntimeShape &input_shape,
+                         const T *input_data, const tflite::RuntimeShape &output_shape,
+                         T *output_data)
+{
+  tflite::optimized_ops::ReluX(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu6.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu6.h
new file mode 100644
index 000000000..bf2f91aa5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu6.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RELU6_H
+#define LUCI_INTERPRETER_PAL_RELU6_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Relu6(const tflite::RuntimeShape &input_shape, const float *input_data,
+                         const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::Relu6(input_shape, input_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void ReluX(const tflite::ReluParams &params, const tflite::RuntimeShape &input_shape,
+                         const T *input_data, const tflite::RuntimeShape &output_shape,
+                         T *output_data)
+{
+  tflite::optimized_ops::ReluX(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RELU6_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeBilinear.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeBilinear.h
new file mode 100644
index 000000000..7380081dc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeBilinear.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/optimized/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+               const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+               const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+                                        output_size_shape, output_size_data,
+                                        unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h
new file mode 100644
index 000000000..74d19265b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+                      const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+                      const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+                      const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+                                               output_size_shape, output_size_data,
+                                               unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSVDF.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSVDF.h
new file mode 100644
index 000000000..0ffba14f0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSVDF.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <tensorflow/lite/kernels/internal/reference/svdf.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+            const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+            const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+            const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+            const int32_t *bias_data, int16_t *activation_state_data,
+            const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+            int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+            int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+  tflite::reference_ops::EvalIntegerSVDF(&params, input_shape, input_data, weight_feature_shape,
+                                         weight_feature_data, weight_time_shape, weight_time_data,
+                                         bias_shape, bias_data, activation_state_data, output_shape,
+                                         output_data, scratchpad_data, output_temp_data, scale_1_a,
+                                         scale_1_b, scale_2_a, scale_2_b, input_zp, output_zp);
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+          const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+          const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+          const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+          const float *bias_data, float *scratchpad_data, float *activation_state_data,
+          const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::EvalFloatSVDF(&params, input_shape, input_data, weight_feature_shape,
+                                       weight_feature_data, weight_time_shape, weight_time_data,
+                                       bias_shape, bias_data, scratchpad_data,
+                                       activation_state_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+  const luci_interpreter::DataType &input_data_type,
+  const luci_interpreter::DataType &weight_feature_data_type,
+  luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+  luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+  luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+  const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+  const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+
+  if (input_data_type == loco::DataType::FLOAT32 &&
+      (weight_feature_data_type == loco::DataType::S8 ||
+       weight_feature_data_type == loco::DataType::U8))
+  {
+    (void)input_shape;
+    (void)weight_time_shape;
+    (void)scratchpad_3;
+    (void)scratchpad_4;
+    (void)scratchpad_5;
+    (void)scratchpad_6;
+
+    throw std::runtime_error("Hybrid type is not currently supported for linux platform");
+  }
+
+  // Resize scratchpad_1 tensor
+  scratchpad_1->resize({batch_size, num_filters});
+
+  if (input_data_type == loco::DataType::S8)
+  {
+    // Resize scratchpad_2 for full_integer op
+    scratchpad_2->resize({batch_size, num_units});
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSlice.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSlice.h
new file mode 100644
index 000000000..640a71684
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSlice.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SLICE_H
+#define LUCI_INTERPRETER_PAL_SLICE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Slice(const tflite::SliceParams &op_params,
+                         const tflite::RuntimeShape &input_shape, const T *input_data,
+                         const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::Slice(op_params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SLICE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSoftmax.h
new file mode 100644
index 000000000..b197e79d1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSoftmax.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+                                              float beta)
+{
+  tflite::optimized_ops::PopulateSoftmaxLookupTable(data, input_scale, beta);
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+  // Do nothing for linux
+  (void)params;
+  (void)input_scale;
+  (void)beta;
+}
+
+template <typename In, typename Out>
+static inline void Softmax(const tflite::SoftmaxParams &params,
+                           const tflite::RuntimeShape &input_shape, const In *input_data,
+                           const tflite::RuntimeShape &output_shape, Out *output_data)
+{
+  tflite::optimized_ops::Softmax(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToBatchND.h
new file mode 100644
index 000000000..5e8de9ba3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToBatchND.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams &params,
+               const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::SpaceToBatchND(
+    params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToDepth.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToDepth.h
new file mode 100644
index 000000000..52d2a5bb1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToDepth.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::SpaceToDepth(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSplit.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSplit.h
new file mode 100644
index 000000000..4d8da72d8
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSplit.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPLIT_H
+#define LUCI_INTERPRETER_PAL_SPLIT_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename Scalar>
+static inline void Split(const tflite::SplitParams &params, const tflite::RuntimeShape &input_shape,
+                         const Scalar *input_data, const tflite::RuntimeShape *const *output_shapes,
+                         Scalar *const *output_data)
+{
+  tflite::optimized_ops::Split(params, input_shape, input_data, output_shapes, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPLIT_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSub.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSub.h
new file mode 100644
index 000000000..04080d619
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSub.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_H
+#define LUCI_INTERPRETER_PAL_SUB_H
+
+#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const tflite::ArithmeticParams &params,
+                       const tflite::RuntimeShape &input1_shape, const T *input1_data,
+                       const tflite::RuntimeShape &input2_shape, const T *input2_data,
+                       const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+                             output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/pal.cmake b/compiler/luci-micro/luci-interpreter/pal/linux/pal.cmake
new file mode 100644
index 000000000..185700cf9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/pal.cmake
@@ -0,0 +1,82 @@
+macro(initialize_pal)
+    nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
+
+    if (NOT TensorFlowSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowGEMMLowpSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowEigenSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Eigen not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowRuySource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Ruy not found")
+        return()
+    endif ()
+
+    find_package(Threads REQUIRED)
+
+    set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+    target_include_directories(${TGT} PRIVATE "${PAL}")
+    target_include_directories(${TGT} SYSTEM PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}")
+    target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+    # TODO put it back, I changed my mind.
+    # instead add sources with visitors in this library
+    set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+
+    if(BUILD_ARM32_NEON)
+        # NOTE may need to revise this list for version upgrade
+        set(PAL_SOURCES ${PAL_SOURCES}
+                ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
+                ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/cpu_check.cc
+                ${TensorFlowRuySource_DIR}/ruy/allocator.cc
+                ${TensorFlowRuySource_DIR}/ruy/block_map.cc
+                ${TensorFlowRuySource_DIR}/ruy/blocking_counter.cc
+                ${TensorFlowRuySource_DIR}/ruy/context_get_ctx.cc
+                ${TensorFlowRuySource_DIR}/ruy/cpuinfo.cc
+                ${TensorFlowRuySource_DIR}/ruy/ctx.cc
+                ${TensorFlowRuySource_DIR}/ruy/denormal.cc
+                ${TensorFlowRuySource_DIR}/ruy/frontend.cc
+                ${TensorFlowRuySource_DIR}/ruy/pack_arm.cc
+                ${TensorFlowRuySource_DIR}/ruy/prepacked_cache.cc
+                ${TensorFlowRuySource_DIR}/ruy/prepare_packed_matrices.cc
+                ${TensorFlowRuySource_DIR}/ruy/system_aligned_alloc.cc
+                ${TensorFlowRuySource_DIR}/ruy/thread_pool.cc
+                ${TensorFlowRuySource_DIR}/ruy/trmul.cc
+                ${TensorFlowRuySource_DIR}/ruy/tune.cc
+                ${TensorFlowRuySource_DIR}/ruy/wait.cc
+                ${TensorFlowRuySource_DIR}/ruy/kernel_arm32.cc
+                )
+    endif(BUILD_ARM32_NEON)
+
+    add_library(luci_interpreter_linux_pal STATIC ${PAL_SOURCES})
+    set_target_properties(luci_interpreter_linux_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
+    target_include_directories(luci_interpreter_linux_pal SYSTEM PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}"
+    )
+
+    target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_linux_pal)
+endmacro()
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst b/compiler/luci-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst
new file mode 100644
index 000000000..f0df58db3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst
@@ -0,0 +1,62 @@
+REGISTER_KERNEL(Add)
+REGISTER_KERNEL(ArgMax)
+REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchToSpaceND)
+REGISTER_KERNEL(Cast)
+REGISTER_KERNEL(Concatenation)
+REGISTER_KERNEL(Conv2D)
+REGISTER_KERNEL(DepthToSpace)
+REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
+REGISTER_KERNEL(Div)
+REGISTER_KERNEL(Elu)
+REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
+REGISTER_KERNEL(Fill)
+REGISTER_KERNEL(Floor)
+REGISTER_KERNEL(FloorDiv)
+REGISTER_KERNEL(Equal)
+REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Greater)
+REGISTER_KERNEL(GreaterEqual)
+REGISTER_KERNEL(If)
+REGISTER_KERNEL(InstanceNorm)
+REGISTER_KERNEL(L2Normalize)
+REGISTER_KERNEL(L2Pool2D)
+REGISTER_KERNEL(LeakyRelu)
+REGISTER_KERNEL(Less)
+REGISTER_KERNEL(LessEqual)
+REGISTER_KERNEL(LogicalAnd)
+REGISTER_KERNEL(LogicalNot)
+REGISTER_KERNEL(LogicalOr)
+REGISTER_KERNEL(Logistic)
+REGISTER_KERNEL(Maximum)
+REGISTER_KERNEL(MaxPool2D)
+REGISTER_KERNEL(Minimum)
+REGISTER_KERNEL(MirrorPad)
+REGISTER_KERNEL(Mul)
+REGISTER_KERNEL(Neg)
+REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(Pad)
+REGISTER_KERNEL(PadV2)
+REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
+REGISTER_KERNEL(Reshape)
+REGISTER_KERNEL(ResizeBilinear)
+REGISTER_KERNEL(ResizeNearestNeighbor)
+REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Shape)
+REGISTER_KERNEL(Softmax)
+REGISTER_KERNEL(SpaceToBatchND)
+REGISTER_KERNEL(SpaceToDepth)
+REGISTER_KERNEL(StridedSlice)
+REGISTER_KERNEL(Sqrt)
+REGISTER_KERNEL(Square)
+REGISTER_KERNEL(SquaredDifference)
+REGISTER_KERNEL(Squeeze)
+REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
+REGISTER_KERNEL(Tanh)
+REGISTER_KERNEL(Transpose)
+REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(While)
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALArgMax.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALArgMax.h
new file mode 100644
index 000000000..21e63296d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALArgMax.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T1, typename T2, typename T3>
+static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data,
+                             const T2 *axis, const tflite::RuntimeShape &output_shape,
+                             T3 *output_data, const std::greater<T1> cmp)
+{
+  tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ARGMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALAveragePool2d.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALAveragePool2d.h
new file mode 100644
index 000000000..cce30601f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALAveragePool2d.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+                               const tflite::RuntimeShape &input_shape, const T *input_data,
+                               const tflite::RuntimeShape &output_shape, T *output_data,
+                               const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation doesn't support
+    assert(false && "AveragePool NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+                                const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                                const tflite::RuntimeShape &output_shape, int8_t *output_data,
+                                const tflite::RuntimeShape &scratchpad_shape,
+                                int8_t *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+
+  tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape,
+                                             output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  (void)input_data_type;
+  (void)input_shape;
+  (void)output_shape;
+
+  scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h
new file mode 100644
index 000000000..4dd77ffdc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::BatchToSpaceND(
+    unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALConv2d.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALConv2d.h
new file mode 100644
index 000000000..13976877a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALConv2d.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/conv.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const float *input_data, const tflite::RuntimeShape &filter_shape,
+                        const float *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const float *bias_data, const tflite::RuntimeShape &output_shape,
+                        float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        float *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data,
+                              tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+                        const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                        uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        uint8 *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+                              scratchpad_data, nullptr);
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+                                  const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+                                  const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+                                  const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                                  const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                                  int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                                  int8 *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+                                                filter_shape, filter_data, bias_shape, bias_data,
+                                                output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::ConvParams &params,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  (void)input_data_type;
+  (void)params;
+  (void)input_shape;
+  (void)filter_shape;
+  (void)output_shape;
+  scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthToSpace.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthToSpace.h
new file mode 100644
index 000000000..8463e571e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthToSpace.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+
+#include <tensorflow/lite/kernels/internal/reference/depth_to_space.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::DepthToSpace(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h
new file mode 100644
index 000000000..c9d1a2948
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+                        const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+                        const T *input_data, const tflite::RuntimeShape &filter_shape,
+                        const T *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+                        T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "DepthwiseConvPerChannel NYI");
+    (void)params;
+    (void)output_multiplier;
+    (void)output_shift;
+    (void)input_shape;
+    (void)output_data;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+  const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+  const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+  const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+  const tflite::RuntimeShape &output_shape, int8_t *output_data,
+  const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_integer_ops::DepthwiseConvPerChannel(
+    params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+    bias_shape, bias_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const tflite::DepthwiseParams &params,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  (void)params;
+  (void)input_data_type;
+  (void)input_shape;
+  (void)filter_shape;
+  (void)output_shape;
+
+  scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALDequantize.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDequantize.h
new file mode 100644
index 000000000..15ff0327b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDequantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const T *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
+                                               output_data);
+}
+
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALElu.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALElu.h
new file mode 100644
index 000000000..4089d0a0c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALElu.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ELU_H
+#define LUCI_INTERPRETER_PAL_ELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/elu.h>
+
+namespace luci_interpreter_pal
+{
+
+static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data,
+                       const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::Elu(input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALFullyConnected.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALFullyConnected.h
new file mode 100644
index 000000000..048624d74
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALFullyConnected.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+                                  const tflite::RuntimeShape &input_shape, const T *input_data,
+                                  const tflite::RuntimeShape &filter_shape, const T *filter_data,
+                                  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                                  const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "FullyConnected NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+  }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+                       const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                       const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+                       const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                       const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+  tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape,
+                                                filter_data, bias_shape, bias_data, output_shape,
+                                                output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Normalize.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Normalize.h
new file mode 100644
index 000000000..f84742a44
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Normalize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+
+#include <tensorflow/lite/kernels/internal/reference/l2normalization.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Normalization(const tflite::L2NormalizationParams &op_params,
+                                   const tflite::RuntimeShape &input_shape, const T *input_data,
+                                   const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::L2Normalization(op_params, input_shape, input_data, output_shape,
+                                         output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h
new file mode 100644
index 000000000..38a302fc6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+                          const T *input_data, const tflite::RuntimeShape &output_shape,
+                          T *output_data)
+{
+  tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALLeakyRelu.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALLeakyRelu.h
new file mode 100644
index 000000000..9ccd2224f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALLeakyRelu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H
+#define LUCI_INTERPRETER_PAL_LEAKYRELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+namespace luci_interpreter_pal
+{
+static inline void LeakyRelu(const tflite::LeakyReluParams &params,
+                             const tflite::RuntimeShape &input_shape, const float *input_data,
+                             const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALMul.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALMul.h
new file mode 100644
index 000000000..347a97a83
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALMul.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/mul.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                       const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                       const T *input2_data, const tflite::RuntimeShape &output_shape,
+                       T *output_data)
+{
+  tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                   const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                   const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALNeg.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALNeg.h
new file mode 100644
index 000000000..be5903a0c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALNeg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_NEG_H
+#define LUCI_INTERPRETER_PAL_NEG_H
+
+#include <tensorflow/lite/kernels/internal/reference/neg.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data,
+                          const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_NEG_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALQuantize.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALQuantize.h
new file mode 100644
index 000000000..6046789ae
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+                            const tflite::RuntimeShape &input_shape, const float *input_data,
+                            const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+                              int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+                              int32_t input_zero_point, int32_t output_zero_point,
+                              Output *output_data)
+{
+  tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
+                                    effective_scale_shift, input_zero_point, output_zero_point,
+                                    output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h
new file mode 100644
index 000000000..cc9f0fd54
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+               const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+               const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+                                        output_size_shape, output_size_data,
+                                        unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h
new file mode 100644
index 000000000..f4d5a6ed3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+                      const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+                      const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+                      const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+                                               output_size_shape, output_size_data,
+                                               unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSVDF.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSVDF.h
new file mode 100644
index 000000000..3bba668fb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSVDF.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <tensorflow/lite/kernels/internal/reference/svdf.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+            const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+            const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+            const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+            const int32_t *bias_data, int16_t *activation_state_data,
+            const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+            int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+            int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+  const int n_rank = params.rank;
+  const int n_batch = input_shape.Dims(0);
+  const int n_input = input_shape.Dims(1);
+  const int n_filter = weight_feature_shape.Dims(0);
+  const int n_unit = n_filter / n_rank;
+  const int n_memory = weight_time_shape.Dims(1);
+
+  // Left shift the activation_state.
+  {
+    int16_t *new_state_start = activation_state_data;
+    const int16_t *old_state_start = activation_state_data + 1;
+    const int16_t *old_state_end = activation_state_data + n_batch * n_filter * n_memory;
+    while (old_state_start != old_state_end)
+    {
+      *new_state_start++ = *old_state_start++;
+    }
+  }
+
+  // Note: no need to clear the latest activation, matmul is not accumulative.
+
+  // Feature matmul.
+  {
+    const int32_t output_max = std::numeric_limits<int16_t>::max();
+    const int32_t output_min = std::numeric_limits<int16_t>::min();
+    int16_t *result_in_batch = activation_state_data + (n_memory - 1);
+    for (int b = 0; b < n_batch; b++)
+    {
+      const int8_t *matrix_ptr = weight_feature_data;
+      for (int r = 0; r < n_filter; r++)
+      {
+        int32_t dot_prod = 0;
+        const int8_t *vector_in_batch = input_data + b * n_input;
+        for (int c = 0; c < n_input; c++)
+        {
+          dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
+        }
+        dot_prod = tflite::MultiplyByQuantizedMultiplier(dot_prod, scale_1_a, scale_1_b);
+        dot_prod = std::min(std::max(output_min, dot_prod), output_max);
+        // This assumes state is symmetrically quantized. Otherwise last bit of
+        // state should be initialized to its zero point and accumulate the
+        // dot_prod.
+        // Equivalent as the following:
+        //     result_in_batch = zero point, which happens to be zero.
+        //     result_in_batch += dot_prod_56.
+        *result_in_batch = dot_prod;
+        result_in_batch += n_memory;
+      }
+    }
+  }
+
+  // Time.
+  {
+    for (int b = 0; b < n_batch; ++b)
+    {
+      int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
+
+      // Perform batched vector dot product:
+      const int16_t *vector1_ptr = weight_time_data;
+      const int16_t *vector2_ptr = activation_state_data + b * n_memory * n_filter;
+
+      for (int i = 0; i < n_filter; i++)
+      {
+        *scratch_ptr_batch = 0;
+        for (int j = 0; j < n_memory; j++)
+        {
+          *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
+        }
+        scratch_ptr_batch++;
+      }
+    }
+  }
+
+  // Reduce, add bias, rescale, activation.
+  {
+    // Add bias.
+    if (bias_data)
+    {
+      // Vector batch assign:
+      for (int i = 0; i < n_batch; ++i)
+      {
+        int32_t *output_ptr = output_temp_data + i * n_unit;
+        const int32_t *bias_ptr = bias_data;
+        for (int j = 0; j < n_unit; ++j)
+        {
+          *output_ptr++ = *bias_ptr++;
+        }
+      }
+    }
+    else
+    {
+      int32_t *output_ptr = output_temp_data;
+      for (int i = 0; i < n_batch * n_unit; ++i)
+      {
+        *output_ptr++ = 0;
+      }
+    }
+
+    // Reduce.
+    for (int b = 0; b < n_batch; ++b)
+    {
+      int32_t *output_temp_ptr = output_temp_data + b * n_unit;
+      int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
+
+      // Reduction sum vector
+      for (int i = 0; i < n_unit; ++i)
+      {
+        for (int j = 0; j < n_rank; ++j)
+        {
+          output_temp_ptr[i] += *scratch_ptr_batch++;
+        }
+      }
+    }
+
+    // Rescale.
+    const int32_t output_max = std::numeric_limits<int8_t>::max();
+    const int32_t output_min = std::numeric_limits<int8_t>::min();
+    for (int i = 0; i < n_batch * n_unit; ++i)
+    {
+      int32_t x1 = output_temp_data[i];
+      int32_t x2 = tflite::MultiplyByQuantizedMultiplier(x1, scale_2_a, scale_2_b);
+      int32_t x3 = x2 + output_zp;
+      int32_t x4 = std::min(std::max(output_min, x3), output_max);
+      output_data[i] = static_cast<int8_t>(x4);
+    }
+  }
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+          const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+          const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+          const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+          const float *bias_data, float *scratchpad_data, float *activation_state_data,
+          const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  const int32_t rank = params.rank;
+  const int32_t batch_size = input_shape.Dims(0);
+  const int32_t input_size = input_shape.Dims(1);
+  const int32_t num_filters = weight_feature_shape.Dims(0);
+  const int32_t num_units = num_filters / rank;
+  const int32_t memory_size = weight_time_shape.Dims(1);
+
+  // Left shift the activation_state.
+  {
+    float *new_state_start = activation_state_data;
+    const float *old_state_start = activation_state_data + 1;
+    const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
+    while (old_state_start != old_state_end)
+    {
+      *new_state_start++ = *old_state_start++;
+    }
+  }
+
+  // Note: no need to clear the latest activation, matmul is not accumulative.
+
+  // Compute conv1d(inputs, weights_feature).
+  // The activation_state's rightmost column is used to save current cycle
+  // activation. This is achieved by starting at state_ptr[memory_size - 1] and
+  // having the stride equal to memory_size.
+
+  // Perform batched matrix vector multiply operation:
+  {
+    const float *matrix = weight_feature_data;
+    const float *vector = input_data;
+    float *result = &activation_state_data[memory_size - 1];
+    float *result_in_batch = result;
+    for (int i = 0; i < batch_size; ++i)
+    {
+      const float *matrix_ptr = matrix;
+      for (int j = 0; j < num_filters; ++j)
+      {
+        float dot_prod = 0.0f;
+        const float *vector_in_batch = vector + i * input_size;
+        for (int k = 0; k < input_size; ++k)
+        {
+          dot_prod += *matrix_ptr++ * *vector_in_batch++;
+        }
+        *result_in_batch = dot_prod;
+        result_in_batch += memory_size;
+      }
+    }
+  }
+
+  tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
+    batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
+    params.activation, activation_state_data, scratchpad_data, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+  const luci_interpreter::DataType &input_data_type,
+  const luci_interpreter::DataType &weight_feature_data_type,
+  luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+  luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+  luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+  const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+  const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+
+  if (input_data_type == loco::DataType::FLOAT32 &&
+      (weight_feature_data_type == loco::DataType::S8 ||
+       weight_feature_data_type == loco::DataType::U8))
+  {
+    (void)input_shape;
+    (void)weight_time_shape;
+    (void)scratchpad_3;
+    (void)scratchpad_4;
+    (void)scratchpad_5;
+    (void)scratchpad_6;
+
+    throw std::runtime_error("Hybrid type is not currently supported for mcu platform");
+  }
+
+  // Resize scratchpad_1 tensor
+  scratchpad_1->resize({batch_size, num_filters});
+
+  if (input_data_type == loco::DataType::S8)
+  {
+    // Resize scratchpad_2 for full_integer op
+    scratchpad_2->resize({batch_size, num_units});
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSoftmax.h
new file mode 100644
index 000000000..9838b542d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSoftmax.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+                                              float beta)
+{
+  // Do nothing for mcu
+  (void)data;
+  (void)input_scale;
+  (void)beta;
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+  int32 input_beta_multiplier;
+  int input_beta_left_shift;
+  static const int kScaledDiffIntegerBits = 5;
+  tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits,
+                                   &input_beta_multiplier, &input_beta_left_shift);
+
+  params->input_multiplier = input_beta_multiplier;
+  params->input_left_shift = input_beta_left_shift;
+  params->diff_min =
+    -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift);
+}
+
+template <typename T>
+static inline void Softmax(const tflite::SoftmaxParams &params,
+                           const tflite::RuntimeShape &input_shape, const T *input_data,
+                           const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  // MARK: At this moment this operation doesn't support on mcu
+  assert(false && "Softmax NYI");
+  (void)params;
+  (void)input_shape;
+  (void)input_data;
+  (void)output_shape;
+  (void)output_data;
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h
new file mode 100644
index 000000000..fdddaa929
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams &params,
+               const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::SpaceToBatchND(
+    params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToDepth.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToDepth.h
new file mode 100644
index 000000000..816b7f663
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToDepth.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_depth.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::SpaceToDepth(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSub.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSub.h
new file mode 100644
index 000000000..ea57578c6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSub.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_H
+#define LUCI_INTERPRETER_PAL_SUB_H
+
+#include <tensorflow/lite/kernels/internal/reference/sub.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const tflite::ArithmeticParams &params,
+                       const tflite::RuntimeShape &input1_shape, const T *input1_data,
+                       const tflite::RuntimeShape &input2_shape, const T *input2_data,
+                       const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+                             output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/pal.cmake b/compiler/luci-micro/luci-interpreter/pal/mcu/pal.cmake
new file mode 100644
index 000000000..907d51de6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/pal.cmake
@@ -0,0 +1,56 @@
+macro(initialize_pal)
+    nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
+
+    if (NOT TensorFlowSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowGEMMLowpSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowEigenSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Eigen not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowRuySource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Ruy not found")
+        return()
+    endif ()
+    #find_package(Threads REQUIRED)
+
+    set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+    target_include_directories(${TGT} PRIVATE "${PAL}")
+    target_include_directories(${TGT} PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}")
+    target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+    # TODO put it back, I changed my mind.
+    # instead add sources with visitors in this library
+    set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
+    add_library(luci_interpreter_mcu_pal STATIC ${PAL_SOURCES})
+    set_target_properties(luci_interpreter_mcu_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
+    target_include_directories(luci_interpreter_mcu_pal PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}"
+    )
+
+    target_link_libraries(${TGT} PRIVATE luci_interpreter_mcu_pal)
+    #target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_mcu_pal)
+endmacro()
diff --git a/compiler/luci-micro/luci-interpreter/requires.cmake b/compiler/luci-micro/luci-interpreter/requires.cmake
new file mode 100644
index 000000000..f411f387a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/requires.cmake
@@ -0,0 +1 @@
+require(luci)
diff --git a/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.cpp
new file mode 100644
index 000000000..6ad1f320c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/BuddyMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+BuddyMemoryManager::BuddyMemoryManager(uint8_t *memory_start, int32_t memSize)
+{
+  int32_t p = lowerLog2(memSize);
+
+  // We assume that the requested size of memory does not exceed 4 GB
+  assert(p < 32);
+  memSize = 1 << p;
+
+  _start_block = reinterpret_cast<Block *>(memory_start);
+  _start_block->size = memSize - sizeof(Block);
+  _start_block->is_free = true;
+  _start_block->self = _start_block;
+  _num_blocks = 0;
+  _size = _start_block->size;
+
+  for (auto &_free_block : _free_blocks)
+    _free_block = nullptr;
+
+  addToBlocks(_start_block, p);
+}
+
+void BuddyMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  const size_t element_size = getDataTypeSize(tensor.element_type());
+  const int32_t num_elements = tensor.shape().num_elements();
+  auto size = num_elements * element_size;
+  auto footprint = size + sizeof(Block);
+  auto l = (footprint & (footprint - 1)) == 0
+             ? lowerLog2(footprint)
+             : lowerLog2(footprint) + 1; // check footprint is pow_of_2
+
+  while (l < 32 && !_free_blocks[l])
+    l++;
+
+  assert(l < 32);
+
+  Block *tmp;
+  tmp = _free_blocks[l];
+  removeFromBlocks(tmp, l);
+
+  while ((tmp->size + sizeof(Block)) / 2 >= size + sizeof(Block))
+  {
+    divideBlock(tmp, l);
+    l--;
+  }
+
+  tmp->is_free = false;
+  tmp->self = tmp;
+  _num_blocks++;
+
+  auto *data = (uint8_t *)(tmp + 1);
+  tensor.set_data_buffer(data);
+}
+
+void BuddyMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  auto data = tensor.data<void>();
+  auto *tmp = (Block *)((uint8_t *)data - sizeof(Block));
+
+  assert(tmp->self == tmp);
+
+  tmp->is_free = true;
+  addToBlocks(tmp, lowerLog2(tmp->size + sizeof(Block)));
+
+  while (tmp)
+    if (tmp->size == _size)
+      break;
+    else
+      tmp = mergeBlock(tmp);
+
+  _num_blocks--;
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.test.cpp b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.test.cpp
new file mode 100644
index 000000000..29fb767b7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.test.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/BuddyMemoryManager.h"
+#include <gtest/gtest.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(BuddyMemoryManager, basic)
+{
+  auto mem_pool = std::make_unique<uint8_t[]>(200);
+  auto buddy_memory_manager = std::make_unique<BuddyMemoryManager>(mem_pool.get(), 130);
+  Tensor first_tensor(DataType::U8, Shape({8}), AffineQuantization{}, "first_tensor");
+
+  buddy_memory_manager->allocate_memory(first_tensor);
+
+  uint8_t data_1[] = {1, 2, 3, 4, 5, 6, 7, 8};
+
+  first_tensor.writeData(data_1, 8);
+  uint8_t array_1[8];
+  first_tensor.readData(array_1, 8);
+  for (int i = 0; i < 8; i++)
+  {
+    EXPECT_EQ(data_1[i], array_1[i]);
+  }
+
+  Tensor second_tensor(DataType::U8, Shape({2, 5}), AffineQuantization{}, "second_tensor");
+  buddy_memory_manager->allocate_memory(second_tensor);
+
+  uint8_t data_2[2][5] = {{11, 22, 33, 44, 55}, {12, 23, 34, 45, 56}};
+  second_tensor.writeData(data_2, 10);
+
+  uint8_t array_2[2][5];
+  second_tensor.readData(array_2, 10);
+  for (int i = 0; i < 2; i++)
+  {
+    for (int j = 0; j < 5; j++)
+    {
+      EXPECT_EQ(data_2[i][j], array_2[i][j]);
+    }
+  }
+
+  buddy_memory_manager->release_memory(first_tensor);
+  EXPECT_EQ(first_tensor.data<void>(), nullptr);
+
+  buddy_memory_manager->release_memory(second_tensor);
+  EXPECT_EQ(second_tensor.data<void>(), nullptr);
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/CMakeLists.txt
new file mode 100644
index 000000000..997b75a84
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/CMakeLists.txt
@@ -0,0 +1,61 @@
+include("${LUCI_INTERPRETER_PAL_DIR}/pal.cmake")
+
+initialize_pal()
+
+if (NOT PAL_INITIALIZED)
+  message("PAL Failed to initialize, skip luci-interpreter")
+  return()
+endif()
+
+message(STATUS "LUCI INTERPRETER BEGIN")
+
+set(LUCI_INTERPRETER_BINARY "luci_interpreter${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_CORE "luci_interpreter_core${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_KERNELS "luci_interpreter_kernels${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_LOADER "luci_interpreter_loader${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_IMPORT "luci_interpreter_import${LUCI_INTERPRETER_SUFFIX}")
+
+add_subdirectory(core)
+message(STATUS "LUCI INTERPRETER CORE")
+add_subdirectory(kernels)
+message(STATUS "LUCI INTERPRETER KERNELS")
+add_subdirectory(loader)
+message(STATUS "LUCI INTERPRETER LOADER")
+add_subdirectory(import)
+message(STATUS "LUCI INTERPRETER IMPORT")
+
+message(STATUS "LUCI INTERPTER INITALIZED")
+
+set(SOURCES
+    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/Interpreter.h"
+    Interpreter.cpp "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h" SimpleMemoryManager.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h" TestMemoryManager.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/BuddyMemoryManager.h" BuddyMemoryManager.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/StaticMemoryManager.h" StaticMemoryManager.cpp)
+
+if (NOT LUCI_INTERPRETER_STATIC)
+  add_library(${LUCI_INTERPRETER_BINARY} SHARED ${SOURCES})
+else ()
+  add_library(${LUCI_INTERPRETER_BINARY} STATIC ${SOURCES})
+endif ()
+
+set(TEST_SOURCES BuddyMemoryManager.test.cpp)
+
+target_include_directories(${LUCI_INTERPRETER_BINARY} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(${LUCI_INTERPRETER_BINARY} PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_BINARY}
+    PUBLIC luci_lang ${LUCI_INTERPRETER_LOADER} ${LUCI_INTERPRETER_CORE}
+    PRIVATE nncc_common)
+
+install(TARGETS ${LUCI_INTERPRETER_BINARY} DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(buddy_manager_test ${TEST_SOURCES})
+target_link_libraries(buddy_manager_test ${LUCI_INTERPRETER_BINARY})
diff --git a/compiler/luci-micro/luci-interpreter/src/Interpreter.cpp b/compiler/luci-micro/luci-interpreter/src/Interpreter.cpp
new file mode 100644
index 000000000..8cf272efd
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/Interpreter.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/Interpreter.h"
+#include "luci_interpreter/SimpleMemoryManager.h"
+
+#include "loader/ModuleLoader.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace
+{
+
+class EventNotifierImpl final : public EventNotifier
+{
+public:
+  EventNotifierImpl(const RuntimeToIR &runtime_to_ir,
+                    const std::vector<ExecutionObserver *> &observers)
+    : _runtime_to_ir(runtime_to_ir), _observers(observers)
+  {
+  }
+
+  void postTensorWrite(const Tensor *tensor) override
+  {
+    assert(tensor != nullptr);
+    for (const auto &observer : _observers)
+    {
+      observer->postTensorWrite(_runtime_to_ir.tensor_to_node.at(tensor), tensor);
+    }
+  }
+
+  void preOperatorExecute(const Kernel *kernel) override
+  {
+    assert(kernel != nullptr);
+    for (const auto &observer : _observers)
+    {
+      observer->preOperatorExecute(_runtime_to_ir.kernel_to_node.at(kernel));
+    }
+  }
+
+  void postOperatorExecute(const Kernel *kernel) override
+  {
+    assert(kernel != nullptr);
+    for (const auto &observer : _observers)
+    {
+      observer->postOperatorExecute(_runtime_to_ir.kernel_to_node.at(kernel));
+    }
+  }
+
+private:
+  const RuntimeToIR &_runtime_to_ir;
+  const std::vector<ExecutionObserver *> &_observers;
+};
+
+} // namespace
+
+Interpreter::Interpreter(const luci::Module *module)
+{
+  _runtime_to_ir = std::make_unique<RuntimeToIR>();
+  _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
+  _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
+
+  _default_memory_manager = std::make_unique<SimpleMemoryManager>();
+
+  ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
+                      _default_memory_manager.get());
+  loader.load();
+}
+
+Interpreter::Interpreter(const luci::Module *module,
+                         luci_interpreter::IMemoryManager *memory_manager)
+{
+  assert(memory_manager && "Use Interpreter::Interpreter(module) constructor instead");
+
+  _runtime_to_ir = std::make_unique<RuntimeToIR>();
+  _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
+  _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
+
+  ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
+                      memory_manager);
+  loader.load();
+}
+
+Interpreter::~Interpreter() = default;
+
+void Interpreter::writeInputTensor(const luci::CircleInput *input_node, const void *data,
+                                   size_t data_size)
+{
+  Tensor *tensor = _runtime_module->getInputTensors()[input_node->index()];
+  if (tensor == nullptr)
+  {
+    const std::string &name = input_node->name();
+    throw std::runtime_error("Cannot find tensor for input node named \"" + name + "\".");
+  }
+  if (data != nullptr)
+    tensor->writeData(data, data_size);
+}
+
+void Interpreter::readOutputTensor(const luci::CircleOutput *output_node, void *data,
+                                   size_t data_size)
+{
+  Tensor *tensor = _runtime_module->getOutputTensors()[output_node->index()];
+  if (tensor == nullptr)
+  {
+    const std::string &name = output_node->name();
+    throw std::runtime_error("Cannot find tensor for output node named \"" + name + "\".");
+  }
+  if (data != nullptr)
+    tensor->readData(data, data_size);
+}
+
+void Interpreter::interpret() { _runtime_module->execute(); }
+
+void Interpreter::attachObserver(ExecutionObserver *observer)
+{
+  if (std::find(_observers.cbegin(), _observers.cend(), observer) != _observers.cend())
+    throw std::runtime_error("Observer is already attached.");
+  _observers.push_back(observer);
+}
+
+ExecutionObserver::~ExecutionObserver() = default;
+
+void ExecutionObserver::postTensorWrite(const luci::CircleNode *, const Tensor *) {}
+
+void ExecutionObserver::preOperatorExecute(const luci::CircleNode *) {}
+
+void ExecutionObserver::postOperatorExecute(const luci::CircleNode *) {}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/SimpleMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/SimpleMemoryManager.cpp
new file mode 100644
index 000000000..230e39896
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/SimpleMemoryManager.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/SimpleMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void SimpleMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  if (!tensor.is_allocatable())
+  {
+    return;
+  }
+  if (tensor.is_data_allocated())
+  {
+    release_memory(tensor);
+  }
+  const auto element_size = getDataTypeSize(tensor.element_type());
+  const auto num_elements = tensor.shape().num_elements();
+
+  auto *data = new uint8_t[num_elements * element_size];
+  tensor.set_data_buffer(data);
+}
+
+void SimpleMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  if (!tensor.is_data_allocated())
+  {
+    tensor.set_data_buffer(nullptr);
+    return;
+  }
+  auto data = tensor.data<uint8_t>();
+  delete[] data;
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/StaticMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/StaticMemoryManager.cpp
new file mode 100644
index 000000000..73a819919
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/StaticMemoryManager.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/StaticMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void StaticMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  if (!tensor.is_allocatable())
+  {
+    return;
+  }
+  int32_t offset = tensor.get_offset();
+  assert(offset >= 0);
+  auto tensor_ptr = _buffer_ptr + offset;
+  tensor.set_data_buffer(tensor_ptr);
+}
+
+void StaticMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/TestMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/TestMemoryManager.cpp
new file mode 100644
index 000000000..3beeee55c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/TestMemoryManager.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void TestMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  if (!tensor.is_allocatable())
+  {
+    return;
+  }
+  if (tensor.is_data_allocated())
+  {
+    release_memory(tensor);
+  }
+  const auto element_size = getDataTypeSize(tensor.element_type());
+  const auto num_elements = tensor.shape().num_elements();
+
+  auto *data = new uint8_t[num_elements * element_size];
+  allocations.push_back(data);
+  tensor.set_data_buffer(data);
+}
+
+void TestMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/core/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/core/CMakeLists.txt
new file mode 100644
index 000000000..c2471e01c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/CMakeLists.txt
@@ -0,0 +1,19 @@
+set(SOURCES
+    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/DataType.h"
+    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/Tensor.h"
+    EventNotifier.h
+    Kernel.h
+    KernelParams.h
+    RuntimeGraph.h
+    RuntimeGraph.cpp
+    RuntimeModule.h
+    Tensor.cpp)
+
+add_library(${LUCI_INTERPRETER_CORE} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+    set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC luci_lang)
+target_link_libraries(${LUCI_INTERPRETER_CORE} PRIVATE nncc_common)
diff --git a/compiler/luci-micro/luci-interpreter/src/core/EventNotifier.h b/compiler/luci-micro/luci-interpreter/src/core/EventNotifier.h
new file mode 100644
index 000000000..5c4fbd3be
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/EventNotifier.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H
+#define LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H
+
+namespace luci_interpreter
+{
+
+// Used at execution stage to tell the interpreter that the runtime state has changed in some way.
+class EventNotifier
+{
+public:
+  virtual ~EventNotifier() = default;
+
+  virtual void postTensorWrite(const Tensor *tensor) = 0;
+  virtual void preOperatorExecute(const Kernel *kernel) = 0;
+  virtual void postOperatorExecute(const Kernel *kernel) = 0;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/Kernel.h b/compiler/luci-micro/luci-interpreter/src/core/Kernel.h
new file mode 100644
index 000000000..a7c4a4218
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/Kernel.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_KERNEL_H
+#define LUCI_INTERPRETER_CORE_KERNEL_H
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <vector>
+
+namespace luci_interpreter
+{
+
+// Base class for all kernels.
+class Kernel
+{
+protected:
+  Kernel(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs)
+    : _inputs(std::move(inputs)), _outputs(std::move(outputs))
+  {
+  }
+
+public:
+  virtual ~Kernel() = default;
+
+  const std::vector<const Tensor *> &getInputTensors() const { return _inputs; }
+  const std::vector<Tensor *> &getOutputTensors() const { return _outputs; }
+
+  // Configures the kernel.
+  // This function is currently called once for each kernel during interpreter construction,
+  // which makes it a convenient place for preparing (resizing) output tensors.
+  virtual void configure() = 0;
+
+  // Executes the kernel.
+  virtual void execute() const = 0;
+
+protected:
+  // NOTE Prefer not to use these in derived classes.
+  const std::vector<const Tensor *> _inputs;
+  const std::vector<Tensor *> _outputs;
+};
+
+// Base class for kernels with parameters.
+template <typename Params> class KernelWithParams : public Kernel
+{
+protected:
+  KernelWithParams(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs,
+                   const Params &params)
+    : Kernel(std::move(inputs), std::move(outputs)), _params(params)
+  {
+  }
+
+public:
+  const Params &params() const { return _params; }
+
+protected:
+  const Params _params;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_KERNEL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/KernelParams.h b/compiler/luci-micro/luci-interpreter/src/core/KernelParams.h
new file mode 100644
index 000000000..6c0220c62
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/KernelParams.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_KERNELPARAMS_H
+#define LUCI_INTERPRETER_CORE_KERNELPARAMS_H
+
+#include <luci/IR/AttrPadding.h>
+#include <luci/IR/AttrFusedActFunc.h>
+#include <luci/IR/AttrMirrorPadMode.h>
+#include <luci_interpreter/core/DataType.h>
+
+#include <cstdint>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+// Inject commonly used types into `luci_interpreter` namespace for convenience.
+using Activation = luci::FusedActFunc;
+using Padding = luci::Padding;
+using MirrorPadMode = luci::MirrorPadMode;
+
+struct AddParams
+{
+  Activation activation;
+};
+
+struct ArgMaxParams
+{
+  DataType output_type;
+};
+
+struct BatchMatMulParams
+{
+  bool adj_x;
+  bool adj_y;
+};
+
+struct ConcatenationParams
+{
+  int axis;
+  Activation activation;
+};
+
+struct Conv2DParams
+{
+  Padding padding;
+  int32_t stride_height;
+  int32_t stride_width;
+  int32_t dilation_height_factor;
+  int32_t dilation_width_factor;
+  Activation activation;
+};
+
+struct DepthToSpaceParams
+{
+  int block_size;
+};
+
+struct DepthwiseConv2DParams
+{
+  Padding padding;
+  int32_t depth_multiplier; // TODO Remove, as it can be calculated.
+  int32_t stride_height;
+  int32_t stride_width;
+  int32_t dilation_height_factor;
+  int32_t dilation_width_factor;
+  Activation activation;
+};
+
+struct DivParams
+{
+  Activation activation;
+};
+
+struct FullyConnectedParams
+{
+  Activation activation;
+  bool keep_num_dims = false;
+};
+
+struct GatherParams
+{
+  int32_t axis;
+  int32_t batch_dims;
+};
+
+struct InstanceNormParams
+{
+  float epsilon;
+  Activation activation;
+};
+
+struct L2NormParams
+{
+  Activation activation;
+};
+
+struct LeakyReluParams
+{
+  float alpha;
+};
+
+struct LocalResponseNormalizationParams
+{
+  int32_t radius;
+  float bias;
+  float alpha;
+  float beta;
+};
+
+struct MirrorPadParams
+{
+  MirrorPadMode mode;
+};
+
+struct MulParams
+{
+  Activation activation;
+};
+
+struct OneHotParams
+{
+  int32_t axis;
+};
+
+struct PackParams
+{
+  int32_t values_count;
+  int32_t axis;
+};
+
+struct Pool2DParams
+{
+  Padding padding;
+  int32_t filter_height;
+  int32_t filter_width;
+  int32_t stride_height;
+  int32_t stride_width;
+  Activation activation;
+};
+
+struct ReducerParams
+{
+  bool keep_dims;
+};
+
+struct ResizeBilinearParams
+{
+  bool align_corners;
+  bool half_pixel_centers;
+};
+
+struct ResizeNearestNeighborParams
+{
+  bool align_corners;
+  bool half_pixel_centers;
+};
+
+struct ShapeParams
+{
+  loco::DataType out_type;
+};
+
+struct SubParams
+{
+  Activation activation;
+};
+
+struct SVDFParams
+{
+  bool asymmetric_quantize_inputs;
+  int32_t svdf_rank;
+  Activation activation;
+};
+
+struct SpaceToDepthParams
+{
+  int block_size;
+};
+
+struct SoftmaxParams
+{
+  float beta;
+};
+
+struct StridedSliceParams
+{
+  int32_t begin_mask;
+  int32_t end_mask;
+  int32_t ellipsis_mask;
+  int32_t new_axis_mask;
+  int32_t shrink_axis_mask;
+};
+
+struct SqueezeParams
+{
+  std::vector<int32_t> squeeze_dims;
+};
+
+struct TransposeConvParams
+{
+  Padding padding;
+  int32_t stride_height;
+  int32_t stride_width;
+};
+
+struct UnpackParams
+{
+  int axis;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_KERNELPARAMS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.cpp b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.cpp
new file mode 100644
index 000000000..c2f8d2ea8
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/RuntimeGraph.h"
+
+#include "core/RuntimeModule.h"
+
+#include <algorithm>
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class RuntimeGraph::TensorAllocPlan
+{
+  std::vector<std::vector<Tensor *>> _alloc_plan;
+  std::vector<std::vector<Tensor *>> _dealloc_plan;
+  bool _valid = false;
+  IMemoryManager *_memory_manager;
+
+public:
+  explicit TensorAllocPlan(IMemoryManager *memory_manager);
+  void invalidate() { _valid = false; }
+  bool isValid() const { return _valid; }
+  void build(const RuntimeGraph &graph);
+  void allocate(size_t kernel_index) const;
+  void deallocate(size_t kernel_index) const;
+};
+
+RuntimeGraph::TensorAllocPlan::TensorAllocPlan(IMemoryManager *memory_manager)
+  : _memory_manager(memory_manager)
+{
+}
+
+void RuntimeGraph::TensorAllocPlan::build(const RuntimeGraph &graph)
+{
+  invalidate();
+  using Lifetime = std::pair<size_t, size_t>;
+  std::unordered_map<Tensor *, Lifetime> lifetimes;
+  const size_t num_kernels = graph._kernels.size();
+  for (size_t index = 0; index < num_kernels; ++index)
+  {
+    const auto &kernel = graph._kernels[index];
+    for (const Tensor *tensor : kernel->getInputTensors())
+    {
+      auto nc_tensor = const_cast<Tensor *>(tensor);
+      if (lifetimes.count(nc_tensor) > 0)
+        lifetimes.at(nc_tensor).second = index;
+    }
+    for (Tensor *tensor : kernel->getOutputTensors())
+    {
+      assert(lifetimes.count(tensor) == 0);
+      lifetimes[tensor] = Lifetime(index, index);
+    }
+  }
+  for (const Tensor *tensor : graph.getOutputTensors())
+  {
+    auto nc_tensor = const_cast<Tensor *>(tensor);
+    if (lifetimes.count(nc_tensor) > 0)
+      lifetimes.at(nc_tensor).second = num_kernels;
+  }
+  _alloc_plan.assign(num_kernels, std::vector<Tensor *>());
+  _dealloc_plan.assign(num_kernels + 1, std::vector<Tensor *>());
+  for (const auto &item : lifetimes)
+  {
+    _alloc_plan[item.second.first].push_back(item.first);
+    _dealloc_plan[item.second.second].push_back(item.first);
+  }
+  _valid = true;
+}
+
+void RuntimeGraph::TensorAllocPlan::allocate(size_t kernel_index) const
+{
+  assert(_valid && kernel_index < _alloc_plan.size());
+  for (Tensor *tensor : _alloc_plan[kernel_index])
+  {
+    _memory_manager->allocate_memory(*tensor);
+  }
+}
+
+void RuntimeGraph::TensorAllocPlan::deallocate(size_t kernel_index) const
+{
+  assert(_valid && kernel_index < _dealloc_plan.size());
+  for (Tensor *tensor : _dealloc_plan[kernel_index])
+  {
+    _memory_manager->release_memory(*tensor);
+  }
+}
+
+RuntimeGraph::RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager)
+  : _owning_module(owning_module), _memory_manager(memory_manager),
+    _tensor_alloc_plan(std::make_unique<TensorAllocPlan>(memory_manager))
+{
+}
+
+RuntimeGraph::~RuntimeGraph()
+{
+  for (auto &tensor : _tensors)
+  {
+    if (tensor->is_data_allocated())
+      _memory_manager->release_memory(*tensor);
+  }
+}
+
+Tensor *RuntimeGraph::addTensor(std::unique_ptr<Tensor> &&tensor)
+{
+  assert(tensor != nullptr);
+  _tensors.push_back(std::move(tensor));
+  return _tensors.back().get();
+}
+
+void RuntimeGraph::setInputTensors(const std::vector<Tensor *> &input_tensors)
+{
+  assert(std::all_of(input_tensors.cbegin(), input_tensors.cend(),
+                     [](Tensor *tensor) { return tensor != nullptr; }));
+  _input_tensors = input_tensors;
+}
+
+void RuntimeGraph::setOutputTensors(const std::vector<Tensor *> &output_tensors)
+{
+  assert(std::all_of(output_tensors.cbegin(), output_tensors.cend(),
+                     [](Tensor *tensor) { return tensor != nullptr; }));
+  _output_tensors = output_tensors;
+}
+
+void RuntimeGraph::configureAllocations(Tensor *tensor)
+{
+  _memory_manager->allocate_memory(*tensor);
+}
+
+void RuntimeGraph::addKernel(std::unique_ptr<Kernel> &&kernel)
+{
+  assert(kernel != nullptr);
+  _kernels.push_back(std::move(kernel));
+  _tensor_alloc_plan->invalidate();
+}
+
+void RuntimeGraph::execute() const
+{
+  if (!_tensor_alloc_plan->isValid())
+    _tensor_alloc_plan->build(*this);
+
+  EventNotifier *event_notifier = _owning_module->getEventNotifier();
+
+  // Notify the observers that the input tensors have changed.
+  if (event_notifier != nullptr)
+  {
+    for (const Tensor *input_tensor : getInputTensors())
+    {
+      if (input_tensor->is_observable())
+        event_notifier->postTensorWrite(input_tensor);
+    }
+  }
+
+  for (size_t index = 0; index < _kernels.size(); ++index)
+  {
+    const auto &kernel = _kernels[index];
+    if (event_notifier != nullptr)
+    {
+      event_notifier->preOperatorExecute(kernel.get());
+    }
+
+    // TODO The `configure` method should only be called if the outputs of an operator need to be
+    //  resized.
+    kernel->configure();
+
+    // Preallocate outputs in advance instead of relying on automatic allocation
+    _tensor_alloc_plan->allocate(index);
+
+    kernel->execute();
+
+    if (event_notifier != nullptr)
+    {
+      event_notifier->postOperatorExecute(kernel.get());
+    }
+
+    for (const Tensor *tensor : kernel->getOutputTensors())
+    {
+      if (event_notifier != nullptr && tensor->is_observable())
+      {
+        event_notifier->postTensorWrite(tensor);
+      }
+    }
+    _tensor_alloc_plan->deallocate(index);
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.h b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.h
new file mode 100644
index 000000000..8184e249d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
+#define LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
+
+#include "luci_interpreter/core/Tensor.h"
+#include "luci_interpreter/MemoryManager.h"
+#include "core/Kernel.h"
+
+#include <memory>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+class RuntimeModule;
+
+class RuntimeGraph
+{
+private:
+  class TensorAllocPlan;
+  friend class TensorAllocPlan;
+
+public:
+  explicit RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager);
+  ~RuntimeGraph();
+
+  Tensor *addTensor(std::unique_ptr<Tensor> &&tensor);
+
+  void setInputTensors(const std::vector<Tensor *> &input_tensors);
+  void setOutputTensors(const std::vector<Tensor *> &output_tensors);
+
+  void configureAllocations(Tensor *tensor);
+
+  const std::vector<Tensor *> &getInputTensors() const { return _input_tensors; }
+  const std::vector<Tensor *> &getOutputTensors() const { return _output_tensors; }
+
+  void addKernel(std::unique_ptr<Kernel> &&kernel);
+
+  void execute() const;
+
+private:
+  IMemoryManager *_memory_manager;
+  RuntimeModule *_owning_module;
+  std::vector<std::unique_ptr<Tensor>> _tensors;
+  std::vector<Tensor *> _input_tensors;
+  std::vector<Tensor *> _output_tensors;
+
+  // Kernels in execution order.
+  std::vector<std::unique_ptr<Kernel>> _kernels;
+  // Tensors that are not used anymore after given op
+  std::unique_ptr<TensorAllocPlan> _tensor_alloc_plan;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/RuntimeModule.h b/compiler/luci-micro/luci-interpreter/src/core/RuntimeModule.h
new file mode 100644
index 000000000..78873b0ec
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/RuntimeModule.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
+#define LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
+
+#include "core/RuntimeGraph.h"
+#include "core/EventNotifier.h"
+#include "luci_interpreter/MemoryManager.h"
+
+#include <memory>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+class RuntimeModule
+{
+public:
+  explicit RuntimeModule(EventNotifier *event_notifier) : _event_notifier(event_notifier) {}
+
+  EventNotifier *getEventNotifier() const { return _event_notifier; }
+
+  RuntimeGraph *addGraph(IMemoryManager *memory_manager)
+  {
+    _graphs.push_back(std::make_unique<RuntimeGraph>(this, memory_manager));
+    return _graphs.back().get();
+  }
+
+  const std::vector<Tensor *> &getInputTensors() const { return getMainGraph()->getInputTensors(); }
+  const std::vector<Tensor *> &getOutputTensors() const
+  {
+    return getMainGraph()->getOutputTensors();
+  }
+
+  void execute() const { getMainGraph()->execute(); }
+
+private:
+  RuntimeGraph *getMainGraph() const { return _graphs[0].get(); }
+
+  EventNotifier *const _event_notifier;
+  std::vector<std::unique_ptr<RuntimeGraph>> _graphs;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/Tensor.cpp b/compiler/luci-micro/luci-interpreter/src/core/Tensor.cpp
new file mode 100644
index 000000000..3c3c5ffff
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/Tensor.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <cstring>
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+Tensor::Tensor(DataType element_type, Shape shape, AffineQuantization quantization,
+               std::string name)
+  : _element_type(element_type), _shape(std::move(shape)), _quantization(std::move(quantization)),
+    _name(std::move(name)), _data_allocated(false)
+{
+}
+
+void Tensor::readData(void *data_ptr, size_t data_size) const
+{
+  const size_t element_size = getDataTypeSize(element_type());
+  const int32_t num_elements = shape().num_elements();
+  if (data_size != num_elements * element_size)
+  {
+    throw std::invalid_argument("Invalid data size.");
+  }
+  assert(data_ptr != nullptr);
+  std::memcpy(data_ptr, data<void>(), data_size);
+}
+
+void Tensor::writeData(const void *data_ptr, size_t data_size)
+{
+  const size_t element_size = getDataTypeSize(element_type());
+  const int32_t num_elements = shape().num_elements();
+  if (data_size != num_elements * element_size)
+  {
+    throw std::invalid_argument("Invalid data size.");
+  }
+  assert(data_ptr != nullptr);
+  std::memcpy(data<void>(), data_ptr, data_size);
+}
+
+void Tensor::resize(const Shape &new_shape) { _shape = new_shape; }
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/import/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/import/CMakeLists.txt
new file mode 100644
index 000000000..dd9733f92
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/import/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(SOURCES
+    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/GraphBuilderRegistry.h"
+    GraphBuilderRegistry.cpp)
+
+# include specific builders
+file(GLOB_RECURSE NODES "Nodes/*")
+list(APPEND SOURCES ${NODES})
+
+add_library(${LUCI_INTERPRETER_IMPORT} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(${LUCI_INTERPRETER_IMPORT} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+
+target_include_directories(${LUCI_INTERPRETER_IMPORT} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_IMPORT} PUBLIC luci_import)
diff --git a/compiler/luci-micro/luci-interpreter/src/import/GraphBuilderRegistry.cpp b/compiler/luci-micro/luci-interpreter/src/import/GraphBuilderRegistry.cpp
new file mode 100644
index 000000000..a33bca6a4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/import/GraphBuilderRegistry.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci_interpreter/GraphBuilderRegistry.h"
+#include "Nodes/CircleReferencingConst.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying()
+{
+  auto builder = std::make_unique<luci::GraphBuilderRegistry>();
+  {
+    // redefine NodeBuilder of BUFFER type
+    builder->add(std::make_unique<CircleReferencingConstNodeBuilder>());
+  }
+
+  return builder;
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp
new file mode 100644
index 000000000..14e90f240
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleReferencingConst.h"
+
+#include <vector>
+
+namespace
+{
+
+// helper struct which describes data loaded to custom_options of CircleReferencingConst node
+struct ConstDataReference
+{
+  const uint8_t *data = nullptr;
+  uint32_t size = 0;
+};
+
+} // namespace
+
+namespace luci_interpreter
+{
+using namespace luci;
+
+CircleNode *CircleReferencingConstNodeBuilder::build(TensorIndex tensor_index,
+                                                     GraphBuilderContext *context) const
+{
+  assert(tensor_index >= 0);
+
+  const auto graph = context->graph();
+  const auto reader = context->reader();
+  const auto tensors = reader->tensors();
+  auto const const_tensor = tensors[tensor_index];
+  assert(const_tensor != nullptr);
+  if (const_tensor->is_variable())
+  {
+    // Create CircleVariable for variable
+    return nullptr;
+  }
+
+  auto const buffer = wrap(reader->buffers()[const_tensor->buffer()]->data());
+  auto const const_dims = wrap(const_tensor->shape()); // in NHWC
+  if (const_dims.empty() && buffer.empty())
+  {
+    // unknown shape tensor and scalar tensor
+    return nullptr;
+  }
+
+  // if tensor_index is used as output to some other operator, this is not a constant
+  auto tensoroutputs = context->tensoroutputs();
+  if (tensoroutputs->find(tensor_index))
+  {
+    // other operator output tensor
+    return nullptr;
+  }
+
+  uint32_t num_elements = 1;
+  for (uint32_t r = 0; r < const_dims.size(); ++r)
+  {
+    num_elements = num_elements * const_dims[r];
+  }
+
+  if (buffer.empty() && num_elements > 0)
+  {
+    // normal empty tensor
+    return nullptr;
+  }
+
+  // create CircleReferencingConst
+  auto custom_node = graph->nodes()->create<CircleCustom>(0, 1);
+  {
+    custom_node->custom_code("CircleReferencingConst");
+
+    copy_tensor_attributes(const_tensor, custom_node);
+    custom_node->shape_status(luci::ShapeStatus::VALID);
+
+    // custom options stores size of buffer and pointer's value to buffer's data
+    {
+      std::vector<uint8_t> custom_options(sizeof(ConstDataReference));
+      {
+        auto &const_data_ref = *reinterpret_cast<ConstDataReference *>(custom_options.data());
+        const_data_ref = {buffer.data(), buffer.size()};
+      }
+      custom_node->custom_options(custom_options);
+    }
+  }
+
+  // Output of CircleCustom node presented with CircleConstNode
+  auto out_node = graph->nodes()->create<CircleCustomOut>();
+  {
+    out_node->index(0);
+    out_node->input(custom_node);
+
+    copy_tensor_attributes(const_tensor, out_node);
+    out_node->shape_status(luci::ShapeStatus::VALID);
+  }
+
+  return out_node;
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.h b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.h
new file mode 100644
index 000000000..ed8f95124
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
+#define __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
+
+#include <luci/Import/NodeBuilder.h>
+
+#include <luci/IR/Nodes/CircleConst.h>
+
+namespace luci_interpreter
+{
+using namespace luci;
+
+/**
+ * @brief Builder creates CircleCustom node with pointer to constants data from Tensor with buffer.
+ */
+class CircleReferencingConstNodeBuilder : public TypedNodeBuilder<NodeBuilderType::BUFFER>
+{
+public:
+  CircleNode *build(TensorIndex tensor_index, GraphBuilderContext *ctx) const final;
+};
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Add.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Add.cpp
new file mode 100644
index 000000000..d7bf3084f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Add.cpp
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Add.h"
+
+#include "kernels/BinaryOpCommon.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/add.h>
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams &params)
+  : KernelWithParams<AddParams>({input1, input2}, {output}, params)
+{
+}
+
+void Add::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+  if (input1()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
+                           input2()->zero_points().size() == 1);
+    LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
+                           output()->zero_point() == 0);
+  }
+
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Add::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Add::evalFloat() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<float>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastAdd4DSlow(
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                               getTensorShape(input2()), getTensorData<float>(input2()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+  }
+}
+
+template <typename T> void Add::evalInteger() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastAdd4DSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+void Add::evalQuantized() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const int left_shift = 20;
+  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+  const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+  const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+  int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+  int input1_shift{}, input2_shift{}, output_shift{};
+  quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+  quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+  quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ArithmeticParams params{};
+  params.left_shift = left_shift;
+  // The kernel expects inputs' zero points to be negated.
+  params.input1_offset = -input1()->zero_point(); // Note the '-'.
+  params.input1_multiplier = input1_multiplier;
+  params.input1_shift = input1_shift;
+  params.input2_offset = -input2()->zero_point(); // Note the '-'.
+  params.input2_multiplier = input2_multiplier;
+  params.input2_shift = input2_shift;
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastAdd4DSlow(
+      params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+      getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+                               getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+                               getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+}
+
+void Add::evalQuantizedS16() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  constexpr int left_shift = 12;
+  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+  const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+  const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+  int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+  int input1_shift{}, input2_shift{}, output_shift{};
+  quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+  quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+  quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  auto fn = [input1_multiplier, input1_shift, //
+             input2_multiplier, input2_shift, //
+             output_multiplier, output_shift, //
+             activation_min, activation_max](int16_t input1_val, int16_t input2_val) {
+    const int32_t shifted_input1_val = static_cast<int32_t>(input1_val) << left_shift;
+    const int32_t shifted_input2_val = static_cast<int32_t>(input2_val) << left_shift;
+    const int32_t scaled_input1_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+      shifted_input1_val, input1_multiplier, input1_shift);
+    const int32_t scaled_input2_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+      shifted_input2_val, input2_multiplier, input2_shift);
+    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
+    const int32_t raw_output = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+      raw_sum, output_multiplier, output_shift);
+    const int32_t clamped_output = std::min(activation_max, std::max(activation_min, raw_output));
+    return static_cast<int16_t>(clamped_output);
+  };
+
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
+                        getTensorShape(input2()), getTensorData<int16_t>(input2()),
+                        getTensorShape(output()), getTensorData<int16_t>(output()), fn);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Add.h b/compiler/luci-micro/luci-interpreter/src/kernels/Add.h
new file mode 100644
index 000000000..91d95b6af
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Add.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ADD_H
+#define LUCI_INTERPRETER_KERNELS_ADD_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Add : public KernelWithParams<AddParams>
+{
+public:
+  Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams &params);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+  void evalQuantizedS16() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ADD_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Add.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Add.test.cpp
new file mode 100644
index 000000000..b8b1c3089
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Add.test.cpp
@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Add.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class AddTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+// for quantized Add, the error shouldn't exceed step
+float GetTolerance(float min, float max)
+{
+  float kQuantizedStep = (max - min) / 255.0;
+  return kQuantizedStep;
+}
+
+TEST_F(AddTest, Uint8)
+{
+  std::initializer_list<int32_t> base_shape = {2, 3, 1, 2};
+  std::initializer_list<float> base_data = {-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                                            1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  std::initializer_list<int32_t> test_shapes[] = {
+    {1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::initializer_list<float> test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  std::initializer_list<int32_t> output_shapes[] = {
+    {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+  std::vector<std::vector<float>> output_data = {
+    {-0.1f, 2.6f,  -0.7f, 2.8f,  0.7f,  3.0f,  1.1f, 0.8f,  0.5f, 1.0f,  1.9f, 1.4f,
+     1.0f,  -0.8f, 0.4f,  -0.6f, 1.8f,  -0.2f, 1.4f, 3.0f,  0.8f, 3.0f,  2.2f, 3.0f,
+     -1.4f, 0.3f,  -2.0f, 0.5f,  -0.6f, 0.9f,  0.9f, -1.9f, 0.3f, -1.7f, 1.7f, -1.3f},
+    {-0.1f, 2.6f, 0.5f, 1.0f, 1.8f, -0.2f, 1.4f, 3.0f, -2.0f, 0.5f, 1.7f, -1.3f},
+    {-0.1f, 2.5f,  0.0f,  2.6f,  -0.7f, 1.9f,  1.1f, 0.7f,  1.2f, 0.8f,  0.5f, 0.1f,
+     1.0f,  -0.9f, 1.1f,  -0.8f, 0.4f,  -1.5f, 1.7f, 3.0f,  2.2f, 3.0f,  2.1f, 3.0f,
+     -1.1f, 0.5f,  -0.6f, 1.0f,  -0.7f, 0.9f,  1.2f, -1.7f, 1.7f, -1.2f, 1.6f, -1.3f},
+    {-0.1f, 2.5f, 1.2f, 0.8f, 0.4f, -1.5f, 1.7f, 3.0f, -0.6f, 1.0f, 1.6f, -1.3f}};
+  float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f);
+  for (int i = 0; i < output_data.size(); i++)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::U8>(
+      base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::U8>(
+      test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
+    Tensor output_tensor =
+      makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+    AddParams params{};
+    params.activation = Activation::NONE;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(dequantizeTensorData(output_tensor),
+                FloatArrayNear(output_data[i], kQuantizedTolerance));
+    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+  }
+  // Re-run with exchanged inputs.
+  for (int i = 0; i < output_data.size(); i++)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::U8>(
+      test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::U8>(
+      base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
+    Tensor output_tensor =
+      makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+    AddParams params{};
+    params.activation = Activation::NONE;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(dequantizeTensorData(output_tensor),
+                FloatArrayNear(output_data[i], kQuantizedTolerance));
+    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+  }
+}
+
+TEST_F(AddTest, Float)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<float>> test_outputs = {
+    {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
+     1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f,
+     0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f},
+    {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f},
+    {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
+     1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f,
+     0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f},
+    {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}};
+  std::vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                                 1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+      << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+      << "With shape number " << i;
+  }
+}
+
+template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<dtype>> test_outputs = {
+    {3, 3, 0, 1, 0, 8, 5,  1, 0, 0, 2, 6, 8,  0, 1, 0, 5, 1,
+     5, 4, 0, 2, 2, 9, 11, 0, 4, 0, 8, 5, 11, 2, 4, 0, 8, 7},
+    {3, 3, 0, 0, 5, 1, 5, 4, 4, 0, 8, 7},
+    {3, 6, 0, 3, 0, 0, 5, 4, 2, 1, 0,  0, 8, 0, 5, 0, 1,  0,
+     0, 2, 2, 4, 7, 9, 6, 0, 8, 0, 13, 5, 6, 0, 8, 2, 13, 7},
+    {3, 6, 2, 1, 1, 0, 0, 2, 8, 0, 13, 7}};
+  std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1};
+  std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+};
+
+TEST_F(AddTest, SInt32)
+{
+  CheckInteger<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(AddTest, SInt64)
+{
+  CheckInteger<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(AddTest, SInt16)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<int32_t>> ref_output_shapes{
+    {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+
+  std::vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                                 1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  std::vector<std::vector<float>> ref_outputs = {
+    {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
+     1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f,
+     0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f},
+    {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f},
+    {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
+     1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f,
+     0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f},
+    {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}};
+
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data,
+                                                          _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0,
+                                                          input2_data, _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0);
+    const float tolerance = output_tensor.scale();
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorShape(output_tensor),
+                ::testing::ElementsAreArray(ref_output_shapes[i]))
+      << "With shape number " << i;
+    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
+      << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs and different scales.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0,
+                                                          input2_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data,
+                                                          _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::S16, 5.0 / 32767, 0);
+    const float tolerance = output_tensor.scale();
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorShape(output_tensor),
+                ::testing::ElementsAreArray(ref_output_shapes[i]))
+      << "With shape number " << i;
+    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
+      << "With shape number " << i;
+  }
+}
+
+TEST_F(AddTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  AddParams params{};
+  params.activation = Activation::RELU;
+
+  Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(AddTest, Invalid_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  AddParams params{};
+  params.activation = Activation::RELU;
+
+  Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(AddTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U64);
+
+  AddParams params{};
+  params.activation = Activation::RELU;
+
+  Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(AddTest, Invalid_Quantization_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+  AddParams params{};
+  params.activation = Activation::NONE;
+
+  Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.cpp
new file mode 100644
index 000000000..6561a1783
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ArgMax.h"
+#include "kernels/Utils.h"
+#include "PALArgMax.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ArgMax::ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams &params)
+  : KernelWithParams<ArgMaxParams>({input, axis}, {output}, params)
+{
+}
+
+void ArgMax::configure()
+{
+  assert(axis()->element_type() == DataType::S32 || axis()->element_type() == DataType::S64);
+  assert(input()->shape().num_dims() >= 1);
+  const Shape &input_shape = input()->shape();
+  const int num_dims = input_shape.num_dims();
+  Shape output_shape(num_dims - 1);
+
+  // If axis value is negative, then update by adding input_shape's num_dims.
+  // If updated value also negative, then assert.
+  assert(axis()->shape().num_elements() == 1);
+  int axis_value = getTensorData<int32_t>(axis())[0];
+  if (axis_value < 0)
+    axis_value = axis_value + num_dims;
+  assert(axis_value >= 0);
+
+  int j = 0;
+  for (int i = 0; i < num_dims; i++)
+  {
+    if (i == axis_value)
+      continue;
+    output_shape.dim(j++) = input_shape.dim(i);
+  }
+
+  assert(output()->element_type() == _params.output_type);
+
+  output()->resize(output_shape);
+}
+
+void ArgMax::execute() const
+{
+
+#define TF_LITE_ARG_MAX(data_type, axis_type, output_type)                                    \
+  luci_interpreter_pal::ArgMinMax(getTensorShape(input()), getTensorData<data_type>(input()), \
+                                  getTensorData<axis_type>(axis()), getTensorShape(output()), \
+                                  getTensorData<output_type>(output()), std::greater<data_type>())
+  if (axis()->element_type() == DataType::S32)
+  {
+    switch (_params.output_type)
+    {
+      case DataType::S32:
+        switch (input()->element_type())
+        {
+          case DataType::FLOAT32:
+            TF_LITE_ARG_MAX(float, int32_t, int32_t);
+            break;
+          case DataType::U8:
+            TF_LITE_ARG_MAX(uint8_t, int32_t, int32_t);
+            break;
+          default:
+            throw std::runtime_error("Unsupported input type.");
+        }
+        break;
+      case DataType::S64:
+        switch (input()->element_type())
+        {
+          case DataType::FLOAT32:
+            TF_LITE_ARG_MAX(float, int32_t, int64_t);
+            break;
+          case DataType::U8:
+            TF_LITE_ARG_MAX(uint8_t, int32_t, int64_t);
+            break;
+          default:
+            throw std::runtime_error("Unsupported input type.");
+        }
+        break;
+      default:
+        throw std::runtime_error("Unsupported output type.");
+    }
+  }
+  else
+  {
+    switch (_params.output_type)
+    {
+      case DataType::S32:
+        switch (input()->element_type())
+        {
+          case DataType::FLOAT32:
+            TF_LITE_ARG_MAX(float, int64_t, int32_t);
+            break;
+          case DataType::U8:
+            TF_LITE_ARG_MAX(uint8_t, int64_t, int32_t);
+            break;
+          default:
+            throw std::runtime_error("Unsupported input type.");
+        }
+        break;
+      case DataType::S64:
+        switch (input()->element_type())
+        {
+          case DataType::FLOAT32:
+            TF_LITE_ARG_MAX(float, int64_t, int64_t);
+            break;
+          case DataType::U8:
+            TF_LITE_ARG_MAX(uint8_t, int64_t, int64_t);
+            break;
+          default:
+            throw std::runtime_error("Unsupported input type.");
+        }
+        break;
+      default:
+        throw std::runtime_error("Unsupported output type.");
+    }
+  }
+#undef TF_LITE_ARG_MAX
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.h b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.h
new file mode 100644
index 000000000..c851b5891
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ARGMAX_H
+#define LUCI_INTERPRETER_KERNELS_ARGMAX_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ArgMax : public KernelWithParams<ArgMaxParams>
+{
+public:
+  ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axis() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ARGMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.test.cpp
new file mode 100644
index 000000000..474f4b321
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.test.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ArgMax.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T1, typename T2>
+void Check(std::initializer_list<int32_t> input_shape,
+           std::initializer_list<int32_t> dimension_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<T1> input_data,
+           std::initializer_list<int32_t> dimension_data, std::initializer_list<T2> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T1>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor dimension_tensor =
+    makeInputTensor<DataType::S32>(dimension_shape, dimension_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(getElementType<T2>());
+
+  ArgMaxParams params{};
+  params.output_type = getElementType<T2>();
+  ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T> class ArgMaxTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(ArgMaxTest, DataTypes);
+
+TYPED_TEST(ArgMaxTest, Simple)
+{
+  Check<TypeParam, int32_t>(/*input_shape=*/{1, 1, 1, 4}, /*dimension_shape=*/{},
+                            /*output_shape=*/{1, 1, 1},
+                            /*input_data=*/
+                            {
+                              1, 9, 7, 3, //
+                            },
+                            /*dimension_data=*/{3}, /*output_data=*/{1});
+  Check<TypeParam, int64_t>(/*input_shape=*/{1, 1, 1, 4}, /*dimension_shape=*/{},
+                            /*output_shape=*/{1, 1, 1},
+                            /*input_data=*/
+                            {
+                              1, 9, 7, 3, //
+                            },
+                            /*dimension_data=*/{3}, /*output_data=*/{1});
+}
+
+TYPED_TEST(ArgMaxTest, MultiDimensions)
+{
+  Check<TypeParam, int32_t>(/*input_shape=*/{1, 1, 2, 4}, /*dimension_shape=*/{},
+                            /*output_shape=*/{1, 1, 2},
+                            /*input_data=*/
+                            {
+                              1, 2, 7, 8, //
+                              1, 9, 7, 3, //
+                            },
+                            /*dimension_data=*/{3}, /*output_data=*/{3, 1});
+  Check<TypeParam, int64_t>(/*input_shape=*/{1, 1, 2, 4}, /*dimension_shape=*/{},
+                            /*output_shape=*/{1, 1, 2},
+                            /*input_data=*/
+                            {
+                              1, 2, 7, 8, //
+                              1, 9, 7, 3, //
+                            },
+                            /*dimension_data=*/{3}, /*output_data=*/{3, 1});
+}
+
+TEST(ArgMaxTest, UnsupportedType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4},
+                                                           {
+                                                             1, 2, 7, 8, //
+                                                             1, 9, 7, 3, //
+                                                           },
+                                                           memory_manager.get());
+  Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  ArgMaxParams params{};
+  params.output_type = DataType::U8;
+  ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.cpp
new file mode 100644
index 000000000..d3bade9e4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/AveragePool2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALAveragePool2d.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad,
+                             const Pool2DParams &params)
+  : KernelWithParams<Pool2DParams>({input}, {output, scratchpad}, params)
+{
+}
+
+void AveragePool2D::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    throw std::runtime_error("Input Tensor and Output Tensor Type must be same");
+  }
+  if (input()->shape().num_dims() != 4)
+  {
+    throw std::runtime_error("Input Tensor Shape must be 4-D");
+  }
+  const Shape &input_shape = input()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t depth = input_shape.dim(3);
+
+  const int32_t output_height =
+    computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height);
+  const int32_t output_width =
+    computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
+
+  _padding_height =
+    computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
+  _padding_width =
+    computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
+  if (input()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
+  }
+  else if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+  }
+  else if (input()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
+  }
+  output()->resize({batches, output_height, output_width, depth});
+
+  auto scratchpad = getOutputTensors()[1];
+  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(),
+                                              getTensorShape(input()), getTensorShape(output()));
+}
+
+void AveragePool2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalSInt16();
+      break;
+    case DataType::S8:
+      evalSInt8();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void AveragePool2D::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  tflite::reference_ops::AveragePool(params, getTensorShape(input()), getTensorData<float>(input()),
+                                     getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void AveragePool2D::evalQuantized() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_ops::AveragePool(params, getTensorShape(input()),
+                                     getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                     getTensorData<uint8_t>(output()));
+}
+
+void AveragePool2D::evalSInt8() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  auto scratchpad = getOutputTensors()[1];
+  int8_t *scratchpad_data = nullptr;
+  if (scratchpad->is_allocatable())
+    scratchpad_data = scratchpad->data<int8_t>();
+
+  luci_interpreter_pal::AveragePool<int8_t>(
+    params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
+}
+
+void AveragePool2D::evalSInt16() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_integer_ops::AveragePool(
+    params, getTensorShape(input()), getTensorData<int16_t>(input()), //
+    getTensorShape(output()), getTensorData<int16_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.h
new file mode 100644
index 000000000..2c8fe16e7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_KERNELS_AVERAGEPOOL2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class AveragePool2D : public KernelWithParams<Pool2DParams>
+{
+public:
+  AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad,
+                const Pool2DParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalSInt16() const;
+  void evalSInt8() const;
+
+private:
+  int32_t _padding_height{};
+  int32_t _padding_width{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_AVERAGEPOOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp
new file mode 100644
index 000000000..478bfa68e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp
@@ -0,0 +1,283 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/AveragePool2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class AveragePool2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(AveragePool2DTest, Float)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<float> input_data{
+    -4, -3, -2, -1, 0,  //
+    1,  2,  3,  4,  5,  //
+    6,  7,  8,  9,  10, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    0, 1.5, //
+    4.5, 6, //
+  };
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1}));
+}
+
+TEST_F(AveragePool2DTest, Uint8_0)
+{
+  std::vector<float> input_data{
+    0,  -6, 12, 4, //
+    -3, -2, 10, 7, //
+  };
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0.0, 6.0}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
+}
+
+TEST_F(AveragePool2DTest, Uint8_1)
+{
+  std::vector<float> input_data{
+    0, 6, 12, 4, //
+    3, 2, 10, 7, //
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({2.75, 6.0}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
+}
+
+TEST_F(AveragePool2DTest, SInt16)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<float> input_data{
+    -4, -3, -2, -1, 0,  //
+    1,  2,  3,  4,  5,  //
+    6,  7,  8,  9,  10, //
+  };
+  std::vector<float> ref_output_data{
+    0, 1.5, //
+    4.5, 6, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+  Tensor scratchpad(DataType::S16, Shape({}), {}, "");
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(AveragePool2DTest, SInt8)
+{
+  Shape input_shape{1, 4, 5, 1};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<float> input_data{-7, -3, 0,  2, -5, 12, -15, 3,  10, 5,
+                                7,  -6, -1, 9, -2, 0,  -5,  11, -1, -7};
+  std::vector<float> ref_output_data{
+    0, 2.5, //
+    1, 1.5, //
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<int8_t>(-15.9375f, 15.9375f);
+  Tensor input_tensor = makeInputTensor<DataType::S8>(
+    input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second);
+  Tensor scratchpad(DataType::S8, Shape({}), {}, "");
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(AveragePool2DTest, Invalid_Input_Shape_NEG)
+{
+  Shape input_shape{1, 3, 5};
+  std::vector<float> input_data{
+    -4, -3, -2, -1, 0,  //
+    1,  2,  3,  4,  5,  //
+    6,  7,  8,  9,  10, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(AveragePool2DTest, In_Out_Type_NEG)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<float> input_data{
+    -4, -3, -2, -1, 0,  //
+    1,  2,  3,  4,  5,  //
+    6,  7,  8,  9,  10, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(AveragePool2DTest, Quant_Param_NEG)
+{
+  std::vector<float> input_data{
+    0,  -6, 12, 4, //
+    -3, -2, 10, 7, //
+  };
+
+  std::pair<float, int32_t> quant_param1 = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
+  std::pair<float, int32_t> quant_param2 = quantizationParams<uint8_t>(-7.875f, 7.875f);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param1.first, quant_param1.second, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second);
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.cpp
new file mode 100644
index 000000000..24ca22996
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchMatMul.h"
+#include "kernels/Utils.h"
+
+#include "PALBatchMatMul.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose.h>
+
+#include <stdexcept>
+
+namespace
+{
+
+tflite::RuntimeShape SwapRowColumnDims(const tflite::RuntimeShape &shape)
+{
+  tflite::RuntimeShape swapped_shape(shape);
+  const int32_t dims = shape.DimensionsCount();
+  swapped_shape.SetDim(dims - 2, shape.Dims(dims - 1));
+  swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2));
+  return swapped_shape;
+}
+
+} // namespace
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+BatchMatMul::BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp,
+                         Tensor *y_tmp, const BatchMatMulParams &params)
+  : KernelWithParams({x, y}, {output, x_tmp, y_tmp}, params)
+{
+}
+
+void BatchMatMul::configure()
+{
+  auto lhs = x();
+  auto rhs = y();
+  auto adj_x = params().adj_x;
+  auto adj_y = params().adj_y;
+
+  // TODO Support non-float types
+  if (lhs->element_type() != DataType::FLOAT32 || rhs->element_type() != DataType::FLOAT32)
+    throw std::runtime_error("Unsupported type.");
+
+  LUCI_INTERPRETER_CHECK(lhs->element_type() == rhs->element_type());
+
+  auto lhs_rank = lhs->shape().num_dims();
+  auto rhs_rank = rhs->shape().num_dims();
+  LUCI_INTERPRETER_CHECK(lhs_rank >= 2 && lhs_rank <= 4);
+  LUCI_INTERPRETER_CHECK(rhs_rank >= 2 && rhs_rank <= 4);
+
+  auto lhs_scratchpad = temp_lhs();
+  auto rhs_scratchpad = temp_rhs();
+  luci_interpreter_pal::SetupScratchpadTensor(lhs_scratchpad, rhs_scratchpad, getTensorShape(lhs),
+                                              getTensorShape(rhs));
+
+  auto output_rank = std::max(lhs_rank, rhs_rank);
+
+  auto extended_lhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(lhs));
+  auto extended_rhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(rhs));
+
+  // Ensure any batch dimensions obey broacasting rules.
+  for (int i = 0; i < output_rank - 2; ++i)
+  {
+    const int lhs_dim = extended_lhs_shape.Dims(i);
+    const int rhs_dim = extended_rhs_shape.Dims(i);
+    if (lhs_dim != rhs_dim)
+    {
+      if (lhs_dim != 1)
+      {
+        LUCI_INTERPRETER_CHECK(rhs_dim == 1);
+      }
+    }
+  }
+
+  // Ensure other dimensions work for matrix multiplication.
+  int accum_dim_lhs =
+    adj_x ? extended_lhs_shape.Dims(output_rank - 2) : extended_lhs_shape.Dims(output_rank - 1);
+  int accum_dim_rhs =
+    adj_y ? extended_rhs_shape.Dims(output_rank - 1) : extended_rhs_shape.Dims(output_rank - 2);
+  LUCI_INTERPRETER_CHECK(accum_dim_lhs == accum_dim_rhs);
+
+  Shape output_shape(output_rank);
+  // Fill in any broadcast dimensions.
+  for (int i = 0; i < output_rank - 2; ++i)
+  {
+    const int lhs_dim = extended_lhs_shape.Dims(i);
+    const int rhs_dim = extended_rhs_shape.Dims(i);
+    int broadcast_dim = lhs_dim;
+    if ((lhs_dim != rhs_dim) && (lhs_dim == 1))
+    {
+      broadcast_dim = rhs_dim;
+    }
+    output_shape.dim(i) = broadcast_dim;
+  }
+  // Fill in the matmul dimensions.
+  int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2;
+  int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1;
+
+  output_shape.dim(output_rank - 2) = extended_lhs_shape.Dims(lhs_rows_index);
+  output_shape.dim(output_rank - 1) = extended_rhs_shape.Dims(rhs_cols_index);
+
+  output()->resize(output_shape);
+}
+
+void TransposeRowsColumns(const Tensor *tensor_in, Tensor *tensor_out)
+{
+  tflite::RuntimeShape transposed_shape(getTensorShape(tensor_in));
+  tflite::RuntimeShape shape(getTensorShape(tensor_in));
+  tflite::TransposeParams params;
+  int rank = shape.DimensionsCount();
+  params.perm_count = rank;
+  for (int i = 0; i < rank - 2; ++i)
+  {
+    params.perm[i] = i;
+  }
+  // Transpose the last two dimensions.
+  params.perm[rank - 2] = rank - 1;
+  params.perm[rank - 1] = rank - 2;
+  transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2));
+  transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1));
+  switch (tensor_in->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::Transpose(params, shape, getTensorData<float>(tensor_in),
+                                       transposed_shape, getTensorData<float>(tensor_out));
+      break;
+    default:
+      throw std::runtime_error("Only suppport fp32 BatchMatMul for now.");
+  }
+}
+
+void BatchMatMul::execute() const
+{
+  auto lhs = x();
+  auto rhs = y();
+
+  bool adj_x = params().adj_x;
+  bool adj_y = params().adj_y;
+
+  auto orig_lhs_shape = getTensorShape(lhs);
+  auto orig_rhs_shape = getTensorShape(rhs);
+
+  auto rhs_tensor = adj_y ? rhs : temp_rhs();
+  auto lhs_tensor = adj_x ? temp_lhs() : lhs;
+  if (not adj_y)
+  {
+    TransposeRowsColumns(rhs, temp_rhs());
+  }
+  if (adj_x)
+  {
+    TransposeRowsColumns(lhs, temp_lhs());
+  }
+  tflite::RuntimeShape rhs_shape = adj_y ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape);
+  tflite::RuntimeShape lhs_shape = adj_x ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape);
+
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::BatchMatMul(rhs_shape, getTensorData<float>(rhs_tensor), lhs_shape,
+                                        getTensorData<float>(lhs_tensor), getTensorShape(output()),
+                                        getTensorData<float>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.h b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.h
new file mode 100644
index 000000000..744f49795
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
+#define LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class BatchMatMul : public KernelWithParams<BatchMatMulParams>
+{
+public:
+  BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp, Tensor *y_tmp,
+              const BatchMatMulParams &params);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  Tensor *temp_lhs() const { return _outputs[1]; }
+  Tensor *temp_rhs() const { return _outputs[2]; }
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp
new file mode 100644
index 000000000..edfa3a685
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchMatMul.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class BatchMatMulTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(BatchMatMulTest, Float)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6};
+  std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_SimpleRHSAdjoint)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6};
+  std::vector<float> rhs_data = {7, 11, 15, 8, 12, 16, 9, 13, 17, 10, 14, 18};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 4, 3}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = true;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_SimpleLHSAdjoint)
+{
+  std::vector<float> lhs_data = {1, 4, 2, 5, 3, 6};
+  std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 2}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = true;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_BatchSizeTwo)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  std::vector<float> rhs_data = {7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18,
+                                 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 2, 3}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218., 560., 584., 608., 632.,
+                              767., 800., 833., 866.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_DiffBatch)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  std::vector<float> rhs_data = {7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18,
+                                 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 1, 6}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 6, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({427., 448., 469., 490., 1039., 1096., 1153., 1210.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 4}));
+}
+
+TEST_F(BatchMatMulTest, Invalid_Shape_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 2}, {1, 2, 3, 4}, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Batch_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 1, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get());
+  Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({3, 3, 1}, {5, 6, 7, 8, 9, 10, 11, 12, 13},
+                                                         _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Rank_NEG)
+{
+  Tensor lhs_tensor = makeInputTensor<DataType::FLOAT32>({4}, {1, 2, 3, 4}, _memory_manager.get());
+  Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12},
+                                                         _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Rank2_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 4}, {1, 2, 3, 4}, _memory_manager.get());
+  Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12},
+                                                         _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, TypeMisMatch_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::U8, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp
new file mode 100644
index 000000000..bd315ff7b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchToSpaceND.h"
+#include "kernels/Utils.h"
+
+#include "PALBatchToSpaceND.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+namespace
+{
+const int kInputMinDimensionNum = 3;
+const int kInputMaxDimensionNum = 4;
+} // namespace
+
+BatchToSpaceND::BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops,
+                               Tensor *output)
+  : Kernel({input, block_shape, crops}, {output})
+{
+}
+
+void BatchToSpaceND::configure()
+{
+
+  const auto *block_shape_data = block_shape()->data<int32_t>();
+  const auto *crops_data = crops()->data<int32_t>();
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  int spatial_dims_num = input()->shape().num_dims() - 2;
+
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
+
+  LUCI_INTERPRETER_CHECK(crops()->shape().num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(crops()->shape().dim(0) == spatial_dims_num);
+  LUCI_INTERPRETER_CHECK(crops()->shape().dim(1) == 2);
+  for (int i = 0; i < spatial_dims_num * 2; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(crops_data[i] >= 0);
+  }
+
+  Shape output_shape = Shape(input()->shape().num_dims());
+  int output_batch_size = input()->shape().dim(0);
+  for (int i = 0; i < spatial_dims_num; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(output_batch_size % block_shape_data[i] == 0);
+    output_batch_size = output_batch_size / block_shape_data[i];
+    output_shape.dim(i + 1) =
+      input()->shape().dim(i + 1) * block_shape_data[i] - crops_data[i * 2] - crops_data[i * 2 + 1];
+  }
+
+  output_shape.dim(0) = output_batch_size;
+  output_shape.dim(input()->shape().num_dims() - 1) =
+    input()->shape().dim(input()->shape().num_dims() - 1);
+  output()->resize(output_shape);
+}
+
+void BatchToSpaceND::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::BatchToSpaceND(
+        getTensorShape(input()), getTensorData<float>(input()), getTensorShape(block_shape()),
+        getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
+        getTensorData<int32_t>(crops()), getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::BatchToSpaceND(
+        getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(block_shape()),
+        getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
+        getTensorData<int32_t>(crops()), getTensorShape(output()),
+        getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.h
new file mode 100644
index 000000000..57703ea5d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class BatchToSpaceND : public Kernel
+{
+public:
+  BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops,
+                 Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *block_shape() const { return _inputs[1]; }
+  const Tensor *crops() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
new file mode 100644
index 000000000..52647a763
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchToSpaceND.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape,
+           std::initializer_list<int32_t> block_shape_shape,
+           std::initializer_list<int32_t> crops_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T> input_data, std::initializer_list<int32_t> block_shape_data,
+           std::initializer_list<int32_t> crops_data, std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor block_shape_tensor =
+    makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+  Tensor crops_tensor =
+    makeInputTensor<DataType::S32>(crops_shape, crops_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T> class BatchToSpaceNDTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(BatchToSpaceNDTest, DataTypes);
+
+TYPED_TEST(BatchToSpaceNDTest, Simple)
+{
+  Check<TypeParam>(/*input_shape=*/{4, 2, 2, 1}, /*block_shape_shape=*/{2}, /*crops_shape=*/{2, 2},
+                   /*output_shape=*/{1, 4, 4, 1},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+                   /*block_shape_data=*/{2, 2}, /*crops_data=*/{0, 0, 0, 0},
+                   /*output_data=*/{1, 5, 2, 6, 9, 13, 10, 14, 3, 7, 4, 8, 11, 15, 12, 16});
+}
+
+TEST(BatchToSpaceNDTest, Invalid_Shape_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {3, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, memory_manager.get());
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+  Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(BatchToSpaceNDTest, Invalid_Crops_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {4, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, memory_manager.get());
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+  Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, -1, 0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BinaryOpCommon.h b/compiler/luci-micro/luci-interpreter/src/kernels/BinaryOpCommon.h
new file mode 100644
index 000000000..2d2842a9e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BinaryOpCommon.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H
+#define LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H
+
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+// Derived from tensorflow/lite/kernels/internal/reference/maximum_minimum.h (v2.3.0).
+template <typename T, typename Op, int N = 5>
+void BinaryOpBroadcastSlow(const tflite::RuntimeShape &unextended_input1_shape,
+                           const T *input1_data,
+                           const tflite::RuntimeShape &unextended_input2_shape,
+                           const T *input2_data,
+                           const tflite::RuntimeShape &unextended_output_shape, T *output_data,
+                           Op op)
+{
+  if (unextended_input1_shape == unextended_input2_shape)
+  {
+    const int flat_size = tflite::MatchingElementsSize(
+      unextended_input1_shape, unextended_input2_shape, unextended_output_shape);
+    for (int i = 0; i < flat_size; ++i)
+    {
+      output_data[i] = op(input1_data[i], input2_data[i]);
+    }
+  }
+  else
+  {
+    assert(unextended_input1_shape.DimensionsCount() <= N);
+    assert(unextended_input2_shape.DimensionsCount() <= N);
+    assert(unextended_output_shape.DimensionsCount() <= N);
+
+    tflite::NdArrayDesc<N> desc1{};
+    tflite::NdArrayDesc<N> desc2{};
+    tflite::NdArrayDesc<N> output_desc{};
+    tflite::NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape,
+                                                &desc1, &desc2);
+    tflite::CopyDimsToDesc(tflite::RuntimeShape::ExtendedShape(N, unextended_output_shape),
+                           &output_desc);
+
+    auto fn = [&](int indexes[N]) {
+      output_data[SubscriptToIndex(output_desc, indexes)] =
+        op(input1_data[SubscriptToIndex(desc1, indexes)],
+           input2_data[SubscriptToIndex(desc2, indexes)]);
+    };
+    tflite::NDOpsHelper<N>(output_desc, fn);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/kernels/CMakeLists.txt
new file mode 100644
index 000000000..9f4ba0e0b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/CMakeLists.txt
@@ -0,0 +1,43 @@
+set(SOURCES
+        BinaryOpCommon.h
+        Utils.h
+        Utils.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h"
+        ${LUCI_INTERPRETER_SOURCE_DIR}/TestMemoryManager.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h"
+        ${LUCI_INTERPRETER_SOURCE_DIR}/SimpleMemoryManager.cpp)
+
+macro(REGISTER_KERNEL NODE)
+  list(APPEND SOURCES "${NODE}.h")
+  list(APPEND SOURCES "${NODE}.cpp")
+endmacro(REGISTER_KERNEL)
+
+include(${KERNEL_REGISTER_FILE})
+
+add_library(${LUCI_INTERPRETER_KERNELS} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
+
+target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_CORE})
+target_link_libraries(${LUCI_INTERPRETER_KERNELS} PRIVATE nncc_common)
+
+add_pal_to_target(${LUCI_INTERPRETER_KERNELS})
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+macro(REGISTER_KERNEL NODE)
+  list(APPEND TEST_SOURCES "${NODE}.test.cpp")
+endmacro(REGISTER_KERNEL)
+
+include(${KERNEL_REGISTER_FILE})
+
+list(APPEND TEST_SOURCES TestUtils.h TestUtils.cpp)
+
+GTest_AddTest(${LUCI_INTERPRETER_KERNELS}_test ${TEST_SOURCES})
+target_link_libraries(${LUCI_INTERPRETER_KERNELS}_test ${LUCI_INTERPRETER_KERNELS})
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Cast.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.cpp
new file mode 100644
index 000000000..39ee725dc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.cpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Cast.h"
+#include "kernels/Utils.h"
+
+namespace
+{
+
+using namespace luci_interpreter;
+using namespace luci_interpreter::kernels;
+
+template <typename InT, typename OutT>
+void cast_data(const InT *in_data, OutT *out_data, uint32_t elements_count)
+{
+  std::transform(in_data, in_data + elements_count, out_data,
+                 [](InT a) { return static_cast<OutT>(a); });
+}
+
+template <typename InT> void cast_from_pointer_to_tensor(const InT *in_data, Tensor *out_tensor)
+{
+  auto const out_type = out_tensor->element_type();
+  auto const elements_count = out_tensor->shape().num_elements();
+
+  switch (out_type)
+  {
+    case loco::DataType::U8:
+      cast_data(in_data, getTensorData<uint8_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::U16:
+      cast_data(in_data, getTensorData<uint16_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::U32:
+      cast_data(in_data, getTensorData<uint32_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::U64:
+      cast_data(in_data, getTensorData<uint64_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::S8:
+      cast_data(in_data, getTensorData<int8_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::S16:
+      cast_data(in_data, getTensorData<int16_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::S32:
+      cast_data(in_data, getTensorData<int32_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::S64:
+      cast_data(in_data, getTensorData<int64_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::FLOAT32:
+      cast_data(in_data, getTensorData<float>(out_tensor), elements_count);
+      break;
+    case loco::DataType::BOOL:
+      cast_data(in_data, getTensorData<bool>(out_tensor), elements_count);
+      break;
+    default:
+      throw std::runtime_error("Unsupported output type.");
+  }
+}
+
+void cast_from_tensor_to_tensor(const Tensor *in_tensor, Tensor *out_tensor)
+{
+  auto in_type = in_tensor->element_type();
+
+  switch (in_type)
+  {
+    case loco::DataType::U8:
+      cast_from_pointer_to_tensor(getTensorData<uint8_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::U16:
+      cast_from_pointer_to_tensor(getTensorData<uint16_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::U32:
+      cast_from_pointer_to_tensor(getTensorData<uint32_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::U64:
+      cast_from_pointer_to_tensor(getTensorData<uint64_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::S8:
+      cast_from_pointer_to_tensor(getTensorData<int8_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::S16:
+      cast_from_pointer_to_tensor(getTensorData<int16_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::S32:
+      cast_from_pointer_to_tensor(getTensorData<int32_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::S64:
+      cast_from_pointer_to_tensor(getTensorData<int64_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::FLOAT32:
+      cast_from_pointer_to_tensor(getTensorData<float>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::BOOL:
+      cast_from_pointer_to_tensor(getTensorData<bool>(in_tensor), out_tensor);
+      break;
+    default:
+      throw std::runtime_error("Unsupported input type.");
+  }
+}
+
+} // namespace
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Cast::Cast(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Cast::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() != loco::DataType::Unknown);
+  LUCI_INTERPRETER_CHECK(output()->element_type() != loco::DataType::Unknown);
+
+  const Shape &shape = input()->shape();
+  output()->resize(shape);
+}
+
+void Cast::execute() const
+{
+  assert(input()->shape().num_elements() == output()->shape().num_elements());
+
+  cast_from_tensor_to_tensor(input(), output());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Cast.h b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.h
new file mode 100644
index 000000000..f0bd02037
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_CAST_H
+#define LUCI_INTERPRETER_KERNELS_CAST_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Cast : public Kernel
+{
+public:
+  Cast(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_CAST_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Cast.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.test.cpp
new file mode 100644
index 000000000..4713ad34c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.test.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Cast.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T1, typename T2>
+void Check(std::initializer_list<int32_t> shape, std::initializer_list<T1> input_data,
+           std::initializer_list<T2> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType input_type = getElementType<T1>();
+  constexpr DataType output_type = getElementType<T2>();
+
+  Tensor input_tensor = makeInputTensor<input_type>(shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(output_type);
+
+  Cast kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), shape);
+}
+
+template <typename T>
+void CheckBoolTo(std::initializer_list<int32_t> shape, std::initializer_list<bool> input_data,
+                 std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType input_type = loco::DataType::BOOL;
+  constexpr DataType output_type = getElementType<T>();
+  std::vector<typename DataTypeImpl<input_type>::Type> input_data_converted;
+  for (auto elem : input_data)
+  {
+    input_data_converted.push_back(elem);
+  }
+
+  Tensor input_tensor =
+    makeInputTensor<input_type>(shape, input_data_converted, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(output_type);
+
+  Cast kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), shape);
+}
+
+template <typename T> class CastTest : public ::testing::Test
+{
+};
+
+using IntDataTypes =
+  ::testing::Types<uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t>;
+TYPED_TEST_SUITE(CastTest, IntDataTypes);
+
+TYPED_TEST(CastTest, FloatToInt)
+{
+  Check<float, TypeParam>(/*shape=*/{1, 1, 1, 4},
+                          /*input_data=*/
+                          {
+                            1.0f, 9.0f, 7.0f, 3.0f, //
+                          },
+                          /*output_data=*/
+                          {
+                            1, 9, 7, 3, //
+                          });
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToFloat)
+{
+  Check<TypeParam, float>(/*shape=*/{1, 1, 1, 4},
+                          /*input_data=*/
+                          {
+                            1, 9, 7, 3, //
+                          },
+                          /*output_data=*/
+                          {
+                            1.0f, 9.0f, 7.0f, 3.0f, //
+                          });
+  SUCCEED();
+}
+
+template <typename T1, typename T2> void check_int()
+{
+  Check<T1, T2>(/*shape=*/{1, 1, 1, 4},
+                /*input_data=*/
+                {
+                  1, 9, 7, 3, //
+                },
+                /*output_data=*/
+                {
+                  1, 9, 7, 3, //
+                });
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToInt)
+{
+  check_int<TypeParam, uint8_t>();
+  check_int<TypeParam, uint16_t>();
+  check_int<TypeParam, uint32_t>();
+  check_int<TypeParam, uint64_t>();
+  check_int<TypeParam, int8_t>();
+  check_int<TypeParam, int16_t>();
+  check_int<TypeParam, int32_t>();
+  check_int<TypeParam, int64_t>();
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToBool)
+{
+  Check<TypeParam, bool>(/*shape=*/{1, 1, 1, 4},
+                         /*input_data=*/
+                         {
+                           1, 0, 7, 0, //
+                         },
+                         /*output_data=*/
+                         {
+                           true, false, true, false, //
+                         });
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, BoolToInt)
+{
+  CheckBoolTo<TypeParam>(/*shape=*/{1, 1, 1, 4},
+                         /*input_data=*/
+                         {
+                           true, false, false, true, //
+                         },
+                         /*output_data=*/
+                         {
+                           1, 0, 0, 1, //
+                         });
+  SUCCEED();
+}
+
+TEST(CastTest, FloatToBool)
+{
+  Check<float, bool>(/*shape=*/{1, 1, 1, 4},
+                     /*input_data=*/
+                     {
+                       1.0f, 0.0f, 7.0f, 0.0f, //
+                     },
+                     /*output_data=*/
+                     {
+                       true, false, true, false, //
+                     });
+  SUCCEED();
+}
+
+TEST(CastTest, BoolToFloat)
+{
+  CheckBoolTo<float>(/*shape=*/{1, 1, 1, 4},
+                     /*input_data=*/
+                     {
+                       true, false, false, true, //
+                     },
+                     /*output_data=*/
+                     {
+                       1.0f, 0.0f, 0.0f, 1.0f, //
+                     });
+  SUCCEED();
+}
+
+TEST(CastTest, FloatToFloat)
+{
+  Check<float, float>(/*shape=*/{1, 1, 1, 4},
+                      /*input_data=*/
+                      {
+                        1.0f, 0.0f, 7.0f, 0.0f, //
+                      },
+                      /*output_data=*/
+                      {
+                        1.0f, 0.0f, 7.0f, 0.0f, //
+                      });
+  SUCCEED();
+}
+
+TEST(CastTest, BoolToBool)
+{
+  CheckBoolTo<bool>(/*shape=*/{1, 1, 1, 4},
+                    /*input_data=*/
+                    {
+                      true, true, false, false, //
+                    },
+                    /*output_data=*/
+                    {
+                      true, true, false, false, //
+                    });
+  SUCCEED();
+}
+
+TEST(CastTest, UnsupportedType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4},
+                                                           {
+                                                             1, 2, 7, 8, //
+                                                             1, 9, 7, 3, //
+                                                           },
+                                                           memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::Unknown);
+
+  Cast kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+  SUCCEED();
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.cpp
new file mode 100644
index 000000000..46ee5941e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Concatenation.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/concatenation.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Concatenation::Concatenation(std::vector<const Tensor *> inputs, Tensor *output,
+                             const ConcatenationParams &params)
+  : KernelWithParams<ConcatenationParams>(std::move(inputs), {output}, params)
+{
+}
+
+void Concatenation::configure()
+{
+  const int num_inputs = _inputs.size();
+  LUCI_INTERPRETER_CHECK(num_inputs > 0);
+  const Tensor *t0 = _inputs[0];
+
+  // TODO: Support concat with fused activation function
+  LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::NONE);
+
+  int axis = _params.axis;
+  if (axis < 0)
+    axis += t0->shape().num_dims();
+  LUCI_INTERPRETER_CHECK(axis >= 0 && axis < t0->shape().num_dims());
+
+  int32_t sum_axis = t0->shape().dim(axis);
+  for (int i = 1; i < num_inputs; ++i)
+  {
+    const Tensor *tensor = _inputs[i];
+    LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type());
+    LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims());
+    for (int d = 0; d < t0->shape().num_dims(); ++d)
+    {
+      if (d == axis)
+      {
+        sum_axis += tensor->shape().dim(axis);
+      }
+      else
+      {
+        LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d));
+      }
+    }
+  }
+
+  Shape output_shape = t0->shape();
+  output_shape.dim(axis) = sum_axis;
+
+  // If input tensors are INT8 type then quantization parameters of all input tensors and the output
+  // should be the same
+  for (auto current_tensor : _inputs)
+  {
+    if (current_tensor->element_type() == DataType::S8)
+    {
+      LUCI_INTERPRETER_CHECK(current_tensor->quantized_dimension() ==
+                             output()->quantized_dimension());
+
+      LUCI_INTERPRETER_CHECK(current_tensor->zero_points().size() ==
+                             current_tensor->scales().size());
+      LUCI_INTERPRETER_CHECK(current_tensor->zero_points() == output()->zero_points());
+      LUCI_INTERPRETER_CHECK(current_tensor->scales() == output()->scales());
+    }
+  }
+  output()->resize(output_shape);
+}
+
+void Concatenation::execute() const
+{
+  switch (_inputs[0]->element_type())
+  {
+    case DataType::FLOAT32:
+      evalGeneric<float>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S8:
+      evalGeneric<int8_t>();
+      break;
+    case DataType::S32:
+      evalGeneric<int32_t>();
+      break;
+    case DataType::S64:
+      evalGeneric<int64_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void Concatenation::evalGeneric() const
+{
+  int axis = _params.axis;
+  if (axis < 0)
+    axis += output()->shape().num_dims();
+
+  VectorOfTensors<T, true> inputs(_inputs);
+  tflite::ConcatenationParams params{};
+  params.axis = axis;
+  params.inputs_count = _inputs.size();
+  tflite::reference_ops::Concatenation(params, inputs.shapes(), inputs.data(),
+                                       getTensorShape(output()), getTensorData<T>(output()));
+}
+
+void Concatenation::evalQuantized() const
+{
+  int axis = _params.axis;
+  if (axis < 0)
+    axis += output()->shape().num_dims();
+
+  VectorOfQuantizedTensors<true> inputs(_inputs);
+  tflite::ConcatenationParams params{};
+  params.axis = axis;
+  params.input_zeropoint = inputs.zero_point();
+  params.input_scale = inputs.scale();
+  params.inputs_count = _inputs.size();
+  params.output_zeropoint = output()->zero_point();
+  params.output_scale = output()->scale();
+
+  tflite::reference_ops::ConcatenationWithScaling(params, inputs.shapes(), inputs.data(),
+                                                  getTensorShape(output()),
+                                                  getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.h b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.h
new file mode 100644
index 000000000..b48c8ed1e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_CONCATENATION_H
+#define LUCI_INTERPRETER_KERNELS_CONCATENATION_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Concatenation : public KernelWithParams<ConcatenationParams>
+{
+public:
+  Concatenation(std::vector<const Tensor *> inputs, Tensor *output,
+                const ConcatenationParams &params);
+
+  const Tensor *input(int index) const { return _inputs[index]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void evalGeneric() const;
+  void evalQuantized() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_CONCATENATION_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.test.cpp
new file mode 100644
index 000000000..f893b38fd
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.test.cpp
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Concatenation.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ConcatenationTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ConcatenationTest, Float)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  // Try different 'axis' and expect different results.
+  {
+    params.axis = 0;
+    params.activation = luci::FusedActFunc::NONE;
+
+    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+    kernel.configure();
+    for (auto t : kernel.getOutputTensors())
+    {
+      _memory_manager->allocate_memory(*t);
+    }
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}));
+  }
+  {
+    params.axis = -2; // Same as '0'.
+    params.activation = luci::FusedActFunc::NONE;
+
+    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}));
+  }
+  {
+    params.axis = 1;
+    params.activation = luci::FusedActFunc::NONE;
+
+    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
+  }
+  {
+    params.axis = -1; // Same as '1'.
+    params.activation = luci::FusedActFunc::NONE;
+
+    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
+  }
+}
+
+TEST_F(ConcatenationTest, Input_Number_Check_NEG)
+{
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Invalid_Axis_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -3;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Mismatching_Input_Type_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<uint8_t> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U8>({2, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Mismatching_Input_Dimension_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12, 13, 14, 15};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({3, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Input_Type_NEG)
+{
+  std::vector<uint8_t> input1_data{1, 2, 3, 4};
+  std::vector<int8_t> input2_data{5, 6, 7, 8};
+  Tensor input1_tensor = makeInputTensor<DataType::U8>({2, 2}, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 2}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Input_Output_Quant_Params_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  int quantized_dimension = 3;
+  std::vector<float> scales{0.1, 0.2, 0.3};
+  std::vector<int32_t> zero_points{1, -1, 1};
+
+  Tensor input1_tensor = makeInputTensor<DataType::S8>(
+    {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S8>(
+    {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, scales.at(0), zero_points.at(0));
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Zero_Point_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4};
+  std::vector<float> input2_data{5, 6, 7, 8};
+  float scale = 0.1;
+  int32_t zero_point_1 = 1;
+  int32_t zero_point_2 = -1;
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_1, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_2, input2_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::S8, scale, zero_point_1);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+// TODO: Remove this test when concat w/ fused_activation is supported
+TEST_F(ConcatenationTest, With_Fused_Activation_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = 1;
+  params.activation = luci::FusedActFunc::RELU;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.cpp
new file mode 100644
index 000000000..234f95425
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.cpp
@@ -0,0 +1,456 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Conv2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALConv2d.h"
+
+#include <stdexcept>
+#include <thread>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Conv2D::Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
+               Tensor *scratchpad, const Conv2DParams &params)
+  : KernelWithParams<Conv2DParams>({input, filter, bias}, {output, scratchpad}, params)
+{
+}
+
+void Conv2D::configure()
+{
+  // TensorFlow Lite (as of v2.2.0) supports the following combinations of types:
+  //     | input filter bias  output |
+  // ----+---------------------------+
+  // (1) | float float  float float  |
+  // (2) | float int8   float float  | hybrid
+  // (3) | uint8 uint8  int32 uint8  | quantized
+  // (4) | int8  int8   int32 int8   | quantized per channel
+  //
+  // We only support (1), (3) and (4) for now, and additionally the following:
+  //     | input filter bias  output |
+  // ----+---------------------------+
+  // (5) | int16 int16  int64 int16  |
+  //
+  if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+  }
+  else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+  }
+  else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+    LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+    LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+                           static_cast<size_t>(filter()->shape().dim(0)));
+    for (auto zerop : filter()->zero_points())
+    {
+      LUCI_INTERPRETER_CHECK(zerop == 0);
+    }
+  }
+  else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+  LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t output_depth = filter_shape.dim(0);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  LUCI_INTERPRETER_CHECK(filter_shape.dim(3) == input_shape.dim(3));
+
+  LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
+                                               bias()->shape().dim(0) == output_depth));
+
+  const int32_t output_height =
+    computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
+                      _params.dilation_height_factor);
+  const int32_t output_width =
+    computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
+                      _params.dilation_width_factor);
+
+  _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
+                                   input_height, filter_height, output_height);
+  _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width,
+                                  filter_width, output_width);
+
+  output()->resize({batches, output_height, output_width, output_depth});
+
+  // Allocate tensor for scratchpad, if needed.
+  tflite::ConvParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  auto scratchpad = getOutputTensors()[1];
+  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(), params,
+                                              getTensorShape(input()), getTensorShape(filter()),
+                                              getTensorShape(output()));
+
+  switch (_params.activation)
+  {
+    case Activation::NONE:
+    case Activation::RELU:
+    case Activation::RELU6:
+    case Activation::RELU_N1_TO_1:
+      break;
+    default:
+      throw std::runtime_error("Unsupported fused activation");
+  }
+}
+
+void Conv2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      if (filter()->element_type() == DataType::FLOAT32)
+      {
+        evalFloat();
+        break;
+      }
+      throw std::runtime_error("Unsupported type.");
+    case DataType::U8:
+      if (filter()->scales().size() == 1)
+      {
+        evalQuantized();
+      }
+      else if (filter()->scales().size() > 1)
+      {
+        LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+        LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+                               static_cast<size_t>(filter()->shape().dim(0)));
+        evalQuantizedPerChannel();
+      }
+      break;
+    case DataType::S8:
+      evalQuantizedS8PerChannel();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Conv2D::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::ConvParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  auto scratchpad = getOutputTensors()[1];
+  float *scratchpad_data = nullptr;
+  if (scratchpad->is_allocatable())
+    scratchpad_data = scratchpad->data<float>();
+
+  luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<float>(input()),
+                             getTensorShape(filter()), getTensorData<float>(filter()),
+                             getTensorShape(bias()), getTensorData<float>(bias()),
+                             getTensorShape(output()), getTensorData<float>(output()),
+                             getTensorShape(scratchpad), scratchpad_data);
+}
+
+void Conv2D::evalQuantized() const
+{
+  const auto input_scale = static_cast<double>(input()->scale());
+  const auto filter_scale = static_cast<double>(filter()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const double real_multiplier = input_scale * filter_scale / output_scale;
+  int32_t output_multiplier{};
+  int output_shift{};
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ConvParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  // The kernel expects input and filter zero points to be negated.
+  params.input_offset = -input()->zero_point();    // Note the '-'.
+  params.weights_offset = -filter()->zero_point(); // Note the '-'.
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  auto scratchpad = getOutputTensors()[1];
+  luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                             getTensorShape(filter()), getTensorData<uint8_t>(filter()),
+                             getTensorShape(bias()), getTensorData<int32_t>(bias()),
+                             getTensorShape(output()), getTensorData<uint8_t>(output()),
+                             getTensorShape(scratchpad), getTensorData<uint8_t>(scratchpad));
+}
+
+void Conv2D::evalQuantizedPerChannel() const
+{
+  const auto *input_data = getTensorData<uint8_t>(input());
+  const auto *filter_data = getTensorData<uint8_t>(filter());
+  const auto *bias_data = getTensorData<int32_t>(bias());
+  auto *output_data = getTensorData<uint8_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t output_depth = filter_shape.dim(0);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+  const int32_t dilation_height_factor = _params.dilation_height_factor;
+  const int32_t dilation_width_factor = _params.dilation_width_factor;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  const std::vector<double> effective_output_scale =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  const std::vector<ChannelQuantMultipliers> multipliers_raw =
+    quantizeMultipliers(effective_output_scale);
+  BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(multipliers_raw);
+
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+        {
+          const int32_t in_y_origin = out_y * stride_height - _padding_height;
+          const int32_t in_x_origin = out_x * stride_width - _padding_width;
+          int32_t acc = 0;
+          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+              const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+              if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+              {
+                for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+                {
+                  const uint8_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const uint8_t filter_val =
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                  acc += static_cast<int32_t>(input_val - input()->zero_point()) *
+                         static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
+                }
+              }
+            }
+          }
+          if (bias_data)
+          {
+            acc += bias_data[out_c];
+          }
+
+          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+            acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift);
+
+          scaled_acc += output()->zero_point();
+          scaled_acc = std::max(scaled_acc, activation_min);
+          scaled_acc = std::min(scaled_acc, activation_max);
+          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+        }
+      }
+    }
+  }
+}
+
+void Conv2D::evalQuantizedS8PerChannel() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ConvParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  // The kernel expects filter zero points to be negated.
+  params.input_offset = -input()->zero_point(); // Note the '-'.
+  params.weights_offset = 0;                    // Unused in tflite code
+  params.output_offset = output()->zero_point();
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers =
+    quantizeMultipliers(effective_output_scales);
+
+  std::vector<int32_t> shifts;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
+                 [](ChannelQuantMultipliers cm) { return cm.shift; });
+  std::vector<int32_t> multipliers;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(),
+                 std::back_inserter(multipliers),
+                 [](ChannelQuantMultipliers cm) { return cm.multiplier; });
+
+  auto scratchpad = getOutputTensors()[1];
+  int8_t *scratchpad_data = nullptr;
+  if (scratchpad->is_allocatable())
+    scratchpad_data = scratchpad->data<int8_t>();
+
+  luci_interpreter_pal::ConvPerChannel(
+    params, multipliers.data(), shifts.data(), getTensorShape(input()),
+    getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
+    getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
+}
+
+void Conv2D::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  const auto *filter_data = getTensorData<int16_t>(filter());
+  const auto *bias_data = getTensorData<int64_t>(bias());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t output_depth = filter_shape.dim(0);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+  const int32_t dilation_height_factor = _params.dilation_height_factor;
+  const int32_t dilation_width_factor = _params.dilation_width_factor;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  const std::vector<double> effective_output_scale =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  const std::vector<ChannelQuantMultipliers> multipliers_raw =
+    quantizeMultipliers(effective_output_scale);
+  BroadcastableWrapper<ChannelQuantMultipliers> multipliers(multipliers_raw);
+
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+        {
+          const int32_t in_y_origin = out_y * stride_height - _padding_height;
+          const int32_t in_x_origin = out_x * stride_width - _padding_width;
+          int64_t acc = 0;
+          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+              const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+              if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+              {
+                for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+                {
+                  const int16_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const int16_t filter_val =
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                  acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+                }
+              }
+            }
+          }
+          if (bias_data)
+          {
+            acc += bias_data[out_c];
+          }
+
+          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+            acc, multipliers[out_c].multiplier, multipliers[out_c].shift);
+
+          scaled_acc = std::max(scaled_acc, activation_min);
+          scaled_acc = std::min(scaled_acc, activation_max);
+
+          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.h
new file mode 100644
index 000000000..330bf3a2a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_CONV2D_H
+#define LUCI_INTERPRETER_KERNELS_CONV2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Conv2D : public KernelWithParams<Conv2DParams>
+{
+public:
+  Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
+         Tensor *scratchpad, const Conv2DParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *filter() const { return _inputs[1]; }
+  const Tensor *bias() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedPerChannel() const;
+  void evalQuantizedS8PerChannel() const;
+  void evalQuantizedS16() const;
+
+private:
+  int32_t _padding_height{};
+  int32_t _padding_width{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_CONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.test.cpp
new file mode 100644
index 000000000..0fe6ef795
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.test.cpp
@@ -0,0 +1,707 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Conv2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class Conv2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(Conv2DTest, Float)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(im2col);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    11, 16, 7, 20, // row = 0
+    0,  40, 0, 44, // row = 1
+  };
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, FloatPointwise)
+{
+  Shape input_shape{1, 2, 2, 2};
+  Shape filter_shape{2, 1, 1, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+    1, 2, // row = 0, col = 0
+    3, 4, // row = 0, col = 1
+    5, 6, // row = 1, col = 0
+    7, 8, // row = 1, col = 1
+  };
+  std::vector<float> filter_data{
+    -1, 2, // out = 0
+    -3, 4, // out = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(im2col);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    4, 7,  6,  9,  // row = 0
+    8, 11, 10, 13, // row = 1
+  };
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, FloatCheck)
+{
+  Shape input_shape{2, 2, 4, 1};
+  Shape filter_shape{3, 2, 2, 1};
+  Shape bias_shape{3};
+  std::vector<float> input_data{
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+    // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
+  };
+  std::vector<float> filter_data{
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
+  };
+  std::vector<float> bias_data{1, 2, 3};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
+  };
+  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, Uint8)
+{
+  std::vector<float> input_data{
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+                // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
+  };
+  std::vector<float> filter_data{
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
+  };
+  std::vector<float> bias_data{1, 2, 3};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::U8>({3, 2, 2, 1}, input_quant_param.first, input_quant_param.second,
+                                  filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(
+    {3}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::U8, Shape({}), {}, "");
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
+  };
+  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, Uint8_CWQ)
+{
+  const int output_channels = 3;
+  std::vector<float> input_data{
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+                // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
+  };
+  std::vector<float> filter_data{
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
+  };
+  std::vector<float> bias_data{1, 2, 3};
+  Shape filter_shape{output_channels, 2, 2, 1};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 4);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(quantizationParams<uint8_t>(0, 4));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops,
+                                                       0, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+                                                      bias_data, _memory_manager.get());
+  Tensor im2col(DataType::U8, Shape({}), {}, "");
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
+  };
+  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, SInt8_CWQ)
+{
+  const int output_channels = 3;
+  std::vector<float> input_data{
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+                // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
+  };
+  std::vector<float> filter_data{
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
+  };
+  std::vector<float> bias_data{1, 2, 3};
+  Shape filter_shape{output_channels, 2, 2, 1};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(0, 4);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops,
+                                                       0, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+                                                      bias_data, _memory_manager.get());
+  Tensor im2col(DataType::S8, Shape({}), {}, "");
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
+  };
+  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, SInt16)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  std::vector<float> ref_output_data{
+    11, 16, 7, 20, // row = 0
+    0,  40, 0, 44, // row = 1
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::S16, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(Conv2DTest, SInt16_CWQ_weights)
+{
+  Shape input_shape{1, 2, 2, 2};  // Batch x H x W x C
+  Shape filter_shape{3, 1, 1, 2}; // Out channels x H x W x In Channels
+  Shape bias_shape{3};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 3};
+
+  std::vector<float> input_data{
+    1, 2, // row = 0, col 0
+    3, 4, // row = 0, col 1
+    5, 6, // row = 1, col 0
+    7, 8, // row = 1, col 1
+  };
+  std::vector<float> filter_data{
+    4, -3, // out = 0
+    1, -3, // out = 1
+    5, -3, // out = 2
+  };
+  std::vector<float> bias_data{1, 10, 5};
+  std::vector<float> ref_output_data{
+    0, 5, 4,  // row 0, col 0
+    1, 1, 8,  // row 0, col 1
+    3, 0, 12, // row 1, col 0
+    5, 0, 16, // row 1, col 1
+  };
+
+  float input_scale = 0.25f;
+  float output_scale = 0.05f;
+  std::vector<float> filter_scales = {0.25f, 0.2f, 0.1f};
+  std::vector<float> bias_scales;
+  for (int i = 0; i < filter_scales.size(); ++i)
+    bias_scales.push_back(filter_scales[i] * input_scale);
+  std::vector<int32_t> zerop = {0, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0,
+                                                        filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
+  Tensor im2col(DataType::S16, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(Conv2DTest, Unsupported_Type_Configure_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<int32_t> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_Bias_Type_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<uint8_t> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_Bias_Data_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{3};
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2, 3};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_Input_Shape_NEG)
+{
+  Shape input_shape{1, 4, 6, 1};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_fused_act_tanh_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::TANH;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.cpp
new file mode 100644
index 000000000..3a9acd1d4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthToSpace.h"
+#include "Utils.h"
+#include "PALDepthToSpace.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+DepthToSpace::DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams &params)
+  : KernelWithParams<DepthToSpaceParams>({input}, {output}, params)
+{
+}
+
+void DepthToSpace::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
+                         output()->element_type() == DataType::U8)
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type())
+  const int block_size = params().block_size;
+  const int32_t input_height = input()->shape().dim(1);
+  const int32_t input_width = input()->shape().dim(2);
+  const int32_t input_channels = input()->shape().dim(3);
+  int32_t output_height = input_height * block_size;
+  int32_t output_width = input_width * block_size;
+  int32_t output_channels = input_channels / block_size / block_size;
+
+  LUCI_INTERPRETER_CHECK(input_height == output_height / block_size);
+  LUCI_INTERPRETER_CHECK(input_width == output_width / block_size);
+  LUCI_INTERPRETER_CHECK(input_channels == output_channels * block_size * block_size);
+
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = output_height;
+  output_shape.dim(2) = output_width;
+  output_shape.dim(3) = output_channels;
+
+  output()->resize(output_shape);
+}
+
+void DepthToSpace::execute() const
+{
+  tflite::DepthToSpaceParams op_params;
+  op_params.block_size = params().block_size;
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
+                                         getTensorData<float>(input()), getTensorShape(output()),
+                                         getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
+                                         getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                         getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported Type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.h b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.h
new file mode 100644
index 000000000..63ce37610
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class DepthToSpace : public KernelWithParams<DepthToSpaceParams>
+{
+public:
+  DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp
new file mode 100644
index 000000000..88e6e07f1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/DepthToSpace.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class DepthToSpaceTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(DepthToSpaceTest, DataTypes);
+
+TYPED_TEST(DepthToSpaceTest, SimpleCase)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<TypeParam> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+  Shape input_shape{1, 1, 2, 4};
+  std::vector<TypeParam> output_data{1, 2, 5, 6, 3, 4, 7, 8};
+  std::vector<int32_t> output_shape{1, 2, 4, 1};
+
+  Tensor input_tensor =
+    makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+
+  DepthToSpaceParams params{};
+  params.block_size = 2;
+
+  DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+              ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(DepthToSpaceTest, InvalidInputShape_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+  Shape input_shape{1, 2, 4};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DepthToSpaceParams params{};
+  params.block_size = 2;
+
+  DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(DepthToSpaceTest, InOutTypeMismatch_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+  Shape input_shape{1, 1, 2, 4};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  DepthToSpaceParams params{};
+  params.block_size = 2;
+
+  DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(DepthToSpaceTest, InvalidBlockSize_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+  Shape input_shape{1, 1, 2, 4};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DepthToSpaceParams params{};
+  params.block_size = 3;
+
+  DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
new file mode 100644
index 000000000..c554c309d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
@@ -0,0 +1,451 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/DepthwiseConv2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALDepthwiseConv2d.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias,
+                                 Tensor *output, Tensor *scratchpad,
+                                 const DepthwiseConv2DParams &params)
+  : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output, scratchpad}, params)
+{
+}
+
+void DepthwiseConv2D::configure()
+{
+  // TensorFlow Lite (as of v2.2.0) supports the following combinations of types:
+  //     | input filter bias  output |
+  // ----+---------------------------+
+  // (1) | float float  float float  |
+  // (2) | float int8   float float  | hybrid
+  // (3) | uint8 uint8  int32 uint8  | quantized
+  // (4) | int8  int8   int32 int8   | quantized per channel
+  // (5) | int16 int8   int64 int16  | quantized per channel 16x8
+  //
+  // We only support (1), (3) and (4) for now, and additionally the following:
+  //     | input filter bias  output |
+  // ----+---------------------------+
+  // (5) | int16 int16  int64 int16  |
+  //
+  if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+  }
+  else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+  }
+  else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+    LUCI_INTERPRETER_CHECK(static_cast<uint32_t>(filter()->shape().dim(3)) ==
+                           filter()->scales().size());
+    for (auto zerop : filter()->zero_points())
+    {
+      LUCI_INTERPRETER_CHECK(zerop == 0);
+    }
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+  }
+  else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+  LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  // Filter format: [1, H, W, O].
+  LUCI_INTERPRETER_CHECK(filter_shape.dim(0) == 1);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t channels_out = filter_shape.dim(3);
+
+  LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
+                                               bias()->shape().dim(0) == channels_out));
+
+  const int32_t output_height =
+    computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
+                      _params.dilation_height_factor);
+  const int32_t output_width =
+    computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
+                      _params.dilation_width_factor);
+
+  _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
+                                   input_height, filter_height, output_height);
+  _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width,
+                                  filter_width, output_width);
+
+  output()->resize({batches, output_height, output_width, channels_out});
+
+  tflite::DepthwiseParams params{};
+
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+
+  auto scratchpad = getOutputTensors()[1];
+  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, params, input()->element_type(),
+                                              getTensorShape(input()), getTensorShape(filter()),
+                                              getTensorShape(output()));
+}
+
+void DepthwiseConv2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      if (filter()->element_type() == DataType::FLOAT32)
+      {
+        evalFloat();
+        break;
+      }
+      throw std::runtime_error("Unsupported type.");
+    case DataType::U8:
+      if (filter()->scales().size() == 1)
+      {
+        evalQuantized();
+      }
+      else if (filter()->scales().size() > 1)
+      {
+        LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+        LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+                               static_cast<size_t>(filter()->shape().dim(3)));
+        evalQuantizedPerChannel();
+      }
+      break;
+    case DataType::S8:
+      evalQuantizedS8PerChannel();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void DepthwiseConv2D::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::DepthwiseParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  params.depth_multiplier = _params.depth_multiplier;
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  tflite::reference_ops::DepthwiseConv(
+    params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
+    getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void DepthwiseConv2D::evalQuantizedPerChannel() const
+{
+  const auto *input_data = getTensorData<uint8_t>(input());
+  const auto *filter_data = getTensorData<uint8_t>(filter());
+  const auto *bias_data = getTensorData<int32_t>(bias());
+  auto *output_data = getTensorData<uint8_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+  const int32_t dilation_height_factor = _params.dilation_height_factor;
+  const int32_t dilation_width_factor = _params.dilation_width_factor;
+  const int32_t depth_multiplier = _params.depth_multiplier;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
+    quantizeMultipliers(effective_output_scales);
+  BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
+
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+        {
+          for (int m = 0; m < depth_multiplier; ++m)
+          {
+            const int output_channel = m + in_channel * depth_multiplier;
+            const int in_x_origin = (out_x * stride_width) - _padding_width;
+            const int in_y_origin = (out_y * stride_height) - _padding_height;
+            int32 acc = 0;
+            for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+            {
+              for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+              {
+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
+                const int in_y = in_y_origin + dilation_height_factor * filter_y;
+                // Zero padding by omitting the areas outside the image.
+                const bool is_point_inside_image =
+                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+                if (is_point_inside_image)
+                {
+                  int32 input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)];
+                  int32 filter_val =
+                    filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)];
+                  acc += (filter_val - filter()->zero_points()[output_channel]) *
+                         (input_val - input()->zero_point());
+                }
+              }
+            }
+            if (bias_data)
+            {
+              acc += bias_data[output_channel];
+            }
+            int32_t output_multiplier = quant_multipliers[output_channel].multiplier;
+            int output_shift = quant_multipliers[output_channel].shift;
+            int32_t scaled_acc =
+              tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+            scaled_acc += output()->zero_point();
+            scaled_acc = std::max(scaled_acc, activation_min);
+            scaled_acc = std::min(scaled_acc, activation_max);
+            output_data[calcOffset(output_shape, batch, out_y, out_x, output_channel)] =
+              static_cast<uint8_t>(scaled_acc);
+          }
+        }
+      }
+    }
+  }
+}
+
+void DepthwiseConv2D::evalQuantized() const
+{
+  const auto input_scale = static_cast<double>(input()->scale());
+  const auto filter_scale = static_cast<double>(filter()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const double real_multiplier = input_scale * filter_scale / output_scale;
+  int32_t output_multiplier{};
+  int output_shift{};
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::DepthwiseParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  params.depth_multiplier = _params.depth_multiplier;
+  // The kernel expects input and filter zero points to be negated.
+  params.input_offset = -input()->zero_point();    // Note the '-'.
+  params.weights_offset = -filter()->zero_point(); // Note the '-'.
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_ops::DepthwiseConv(
+    params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
+    getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+    getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void DepthwiseConv2D::evalQuantizedS8PerChannel() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::DepthwiseParams params{};
+
+  params.padding_type = tflite::PaddingType::kSame;
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  params.depth_multiplier = _params.depth_multiplier;
+  // The kernel expects input and filter zero points to be negated.
+  params.input_offset = -input()->zero_point(); // Note the '-'.
+  params.weights_offset = 0;
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = 1; // unused in tflite code
+  params.output_shift = 0;      // unused in tflite code
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers =
+    quantizeMultipliers(effective_output_scales);
+
+  std::vector<int32_t> shifts;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
+                 [](ChannelQuantMultipliers cm) { return cm.shift; });
+  std::vector<int32_t> multipliers;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(),
+                 std::back_inserter(multipliers),
+                 [](ChannelQuantMultipliers cm) { return cm.multiplier; });
+
+  auto scratchpad = getOutputTensors()[1];
+  int8_t *scratchpad_data = nullptr;
+  if (scratchpad->is_allocatable())
+    scratchpad_data = scratchpad->data<int8_t>();
+
+  luci_interpreter_pal::DepthwiseConvPerChannel<int8_t>(
+    params, multipliers.data(), shifts.data(), getTensorShape(input()),
+    getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
+    getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
+}
+
+void DepthwiseConv2D::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  const auto *filter_data = getTensorData<int16_t>(filter());
+  const auto *bias_data = getTensorData<int64_t>(bias());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+  const int32_t dilation_height_factor = _params.dilation_height_factor;
+  const int32_t dilation_width_factor = _params.dilation_width_factor;
+  const int32_t depth_multiplier = _params.depth_multiplier;
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
+    quantizeMultipliers(effective_output_scales);
+
+  BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+        {
+          for (int32_t m = 0; m < depth_multiplier; ++m)
+          {
+            const int32_t out_c = m + in_c * depth_multiplier;
+            const int32_t in_y_origin = out_y * stride_height - _padding_height;
+            const int32_t in_x_origin = out_x * stride_width - _padding_width;
+            int64_t acc = 0;
+            for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+            {
+              for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+              {
+                const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+                const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+                if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+                {
+                  const int16_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const int16_t filter_val =
+                    filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, out_c)];
+                  acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+                }
+              }
+            }
+            if (bias_data != nullptr)
+            {
+              acc += bias_data[out_c];
+            }
+
+            int32_t output_multiplier = quant_multipliers[out_c].multiplier;
+            int output_shift = quant_multipliers[out_c].shift;
+            int32_t scaled_acc =
+              tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+
+            scaled_acc = std::max(scaled_acc, activation_min);
+            scaled_acc = std::min(scaled_acc, activation_max);
+
+            output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h
new file mode 100644
index 000000000..3d1faf6c1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class DepthwiseConv2D : public KernelWithParams<DepthwiseConv2DParams>
+{
+public:
+  DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
+                  Tensor *scratchpad, const DepthwiseConv2DParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *filter() const { return _inputs[1]; }
+  const Tensor *bias() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedPerChannel() const;
+  void evalQuantizedS8PerChannel() const;
+  void evalQuantizedS16() const;
+
+private:
+  int32_t _padding_height{};
+  int32_t _padding_width{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
new file mode 100644
index 000000000..6b4673f3e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
@@ -0,0 +1,622 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/DepthwiseConv2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class DepthwiseConv2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(DepthwiseConv2DTest, Float)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    71,  0, 99,  0,  //
+    167, 0, 227, 28, //
+  };
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
+}
+
+TEST_F(DepthwiseConv2DTest, Uint8)
+{
+  std::vector<float> input_data{
+    1, 2, 7,  8,  // column 1
+    3, 4, 9,  10, // column 2
+    5, 6, 11, 12, // column 3
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 3, 2, 2}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 2, 4}, input_quant_param.first, input_quant_param.second,
+                                  filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(
+    {4}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    71, -34, 99,  -20, //
+    91, -26, 127, -4,  //
+  };
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
+}
+
+TEST_F(DepthwiseConv2DTest, SInt16)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<int32_t> ref_output_shape{1, 2, 1, 4};
+
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  std::vector<float> ref_output_data{
+    71,  0, 99,  0,  //
+    167, 0, 227, 28, //
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+  Tensor scratchpad(DataType::S64, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(DepthwiseConv2DTest, SInt16_CWQ_weights)
+{
+  const int output_channels = 4;
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, output_channels};
+  Shape bias_shape{4};
+  std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
+
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  std::vector<float> ref_output_data{
+    71,  0, 99,  0,  //
+    167, 0, 227, 28, //
+  };
+
+  float input_scale = 0.25;
+  std::vector<float> filter_scales{0.2f, 1.f, 0.5f, 0.1f};
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_scales[i] * input_scale);
+  std::vector<int32_t> zerop(4, 0);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 3,
+                                                        filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+  Tensor scratchpad(DataType::S16, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(DepthwiseConv2DTest, Uint8_CWQ_weights)
+{
+  const int output_channels = 4;
+  Shape input_shape{1, 3, 2, 2};
+  Shape filter_shape{1, 2, 2, output_channels};
+  Shape bias_shape{4};
+  std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
+
+  std::vector<float> input_data{
+    1, 2, 7,  8,  //
+    3, 4, 9,  10, //
+    5, 6, 11, 12, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  std::vector<float> ref_output_data{
+    71, -34, 99,  -20, //
+    91, -26, 127, -4,  //
+  };
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 16);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-9, 13));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-14, 10));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-11, 15));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-16, 12));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops,
+                                                       3, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, output_quant_param.first));
+}
+
+TEST_F(DepthwiseConv2DTest, SInt8_CWQ_weights)
+{
+  const int output_channels = 4;
+  Shape input_shape{1, 3, 2, 2};
+  Shape filter_shape{1, 2, 2, output_channels};
+  Shape bias_shape{4};
+  std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
+
+  std::vector<float> input_data{
+    1, 2, 7,  8,  //
+    3, 4, 9,  10, //
+    5, 6, 11, 12, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  std::vector<float> ref_output_data{
+    71, -34, 99,  -20, //
+    91, -26, 127, -4,  //
+  };
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-128, 127);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(1, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops,
+                                                       3, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+  Tensor scratchpad(DataType::S8, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, output_quant_param.first));
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidBiasType_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<int32_t> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidInputShape_NEG)
+{
+  Shape input_shape{4, 2, 2};
+  Shape filter_shape{2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidFilterShape_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{2, 1, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidBiasDim_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 4, 2};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.cpp
new file mode 100644
index 000000000..96399e5c7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Dequantize.h"
+#include "kernels/Utils.h"
+#include "PALDequantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Dequantize::Dequantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Dequantize::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::S8 ||
+                         input()->element_type() == loco::DataType::U8 ||
+                         input()->element_type() == loco::DataType::S16);
+
+  LUCI_INTERPRETER_CHECK(input()->scales().size() == 1);
+
+  if (input()->element_type() == loco::DataType::S16)
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+  LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32);
+
+  output()->resize(input()->shape());
+}
+
+void Dequantize::execute() const
+{
+  tflite::DequantizationParams op_params;
+  op_params.zero_point = input()->zero_point();
+  op_params.scale = input()->scale();
+
+  switch (input()->element_type())
+  {
+    case loco::DataType::U8:
+    {
+      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+                                       getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    }
+    case loco::DataType::S8:
+    {
+      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+                                       getTensorData<int8_t>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    }
+    case loco::DataType::S16:
+    {
+      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+                                       getTensorData<int16_t>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.h b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.h
new file mode 100644
index 000000000..5565df0e4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
+#define LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Dequantize : public Kernel
+{
+public:
+  Dequantize(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.test.cpp
new file mode 100644
index 000000000..0cab633d6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.test.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Dequantize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class DequantizeTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(DequantizeTest, Uint8)
+{
+  std::vector<uint8_t> input_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255};
+
+  std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  Tensor input_tensor(loco::DataType::U8, {2, 5}, {{0.5}, {127}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(uint8_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, Sint8)
+{
+  std::vector<int8_t> input_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127};
+
+  std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  Tensor input_tensor(loco::DataType::S8, {2, 5}, {{0.5}, {-1}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int8_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, Sint16)
+{
+  std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+  std::vector<float> ref_output_data{-64.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 65.5};
+
+  Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, InvalidInputType_NEG)
+{
+  std::vector<float> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DequantizeTest, InvalidOutputType_NEG)
+{
+  std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+  Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DequantizeTest, InvalidInputZeroPoint_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({2, 5}, 0.5, -1, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Div.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Div.cpp
new file mode 100644
index 000000000..dd1532278
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Div.cpp
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Div.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/div.h>
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Div::Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams &params)
+  : KernelWithParams<DivParams>({input1, input2}, {output}, params)
+{
+}
+
+void Div::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Div::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Div::evalFloat() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<float>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastDivSlow(
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                               getTensorShape(input2()), getTensorData<float>(input2()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+  }
+}
+
+template <typename T> void Div::evalInteger() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastDivSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+void Div::evalQuantized() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const double real_output_multiplier = input1_scale / (input2_scale * output_scale);
+
+  int32_t output_multiplier{};
+  int output_shift{};
+
+  quantizeMultiplier(real_output_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ArithmeticParams params{};
+
+  params.input1_offset = -input1()->zero_point(); // Note the '-'.
+  params.input2_offset = -input2()->zero_point(); // Note the '-'.
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastDivSlow(
+      params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+      getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+                               getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+                               getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Div.h b/compiler/luci-micro/luci-interpreter/src/kernels/Div.h
new file mode 100644
index 000000000..c1bf3e10b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Div.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DIV_H
+#define LUCI_INTERPRETER_KERNELS_DIV_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Div : public KernelWithParams<DivParams>
+{
+public:
+  Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams &params);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DIV_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Div.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Div.test.cpp
new file mode 100644
index 000000000..85cd8b90a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Div.test.cpp
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Div.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class DivTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+float GetTolerance(float min, float max)
+{
+  const float kQuantizedStep = (max - min) / 255.0f;
+  const float kQuantizedTolerance = 2.0f * kQuantizedStep + kQuantizedStep * kQuantizedStep;
+  return kQuantizedTolerance;
+}
+
+TEST_F(DivTest, Float)
+{
+  Shape base_shape = {2, 3, 1, 1};
+
+  std::vector<int32_t> output_shape = {2, 3, 1, 1};
+
+  std::vector<float> input1_data{0.3f, 2.3f, 0.9f, 0.5f, 0.8f, 1.1f};
+  std::vector<float> input2_data{0.2f, 1.6f, 0.5f, 0.4f, 1.6f, 0.4f};
+  std::vector<float> test_outputs{1.5f, 1.4375f, 1.8f, 1.25f, 0.5f, 2.75f};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST_F(DivTest, FloatBroadcast)
+{
+  Shape input1_shape = {1, 3};
+  Shape input2_shape = {3, 1};
+
+  std::vector<float> input1_data{-0.3f, 2.3f, 0.9f};
+  std::vector<float> input2_data{0.2f, 1.6f, 0.5f};
+  std::vector<float> test_outputs{0.f, 11.5f, 4.5f, 0.f, 1.4375f, 0.5625f, 0.f, 4.6f, 1.8f};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+}
+
+TEST_F(DivTest, Uint8)
+{
+  Shape base_shape = {1, 2, 2, 1};
+
+  std::vector<int32_t> output_shape = {1, 2, 2, 1};
+
+  std::vector<float> input1_data = {-0.8f, -0.2f, 0.3f, 0.7f};
+  std::vector<float> input2_data = {-0.8f, 0.4f, 0.8f, 1.0f};
+  std::vector<float> test_outputs{1.0f, 0.f, 0.375f, 0.7f};
+
+  const float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.f, 1.f);
+
+  Tensor input1_tensor = makeInputTensor<DataType::U8>(
+    base_shape, quant_param.first, quant_param.second, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U8>(
+    base_shape, quant_param.first, quant_param.second, input2_data, _memory_manager.get());
+
+  Tensor output_tensor =
+    makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(test_outputs, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+
+  std::vector<std::vector<dtype>> test_outputs = {{5,  6,  2, 0,  10, 3, //
+                                                   10, 0,  4, 5,  20, 0, //
+                                                   0,  0,  0, 2,  0,  0, //
+                                                   2,  0,  1, 10, 5,  0, //
+                                                   2,  3,  1, 0,  5,  1, //
+                                                   18, 20, 7, 0,  37, 10},
+                                                  {5, 6, 4, 5, 0, 0, 2, 0, 1, 0, 37, 10},
+                                                  {5, 7, 4, 6, 2, 3, 10, 0,  8,  0,  4, 0,
+                                                   0, 0, 0, 0, 0, 0, 0,  10, 5,  0,  1, 0,
+                                                   0, 0, 5, 9, 1, 1, 0,  0,  37, 50, 7, 10},
+                                                  {5, 7, 8, 0, 0, 0, 0, 10, 5, 9, 7, 10}};
+  std::vector<dtype> input1_data{20, 30, 40, -17, -4, -7, 11, -31, 10, 19, 75, 100};
+  std::vector<dtype> input2_data{4, 5, 10, -3, 2, 10};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    DivParams params{};
+    params.activation = Activation::RELU;
+
+    Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+}
+
+TEST_F(DivTest, SInt64)
+{
+  checkInteger<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(DivTest, SInt32)
+{
+  checkInteger<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(DivTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DivTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U64);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(DivTest, Invalid_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Elu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.cpp
new file mode 100644
index 000000000..697d63be4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Elu.h"
+#include "kernels/Utils.h"
+
+#include "PALElu.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Elu::Elu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Elu::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  output()->resize(input()->shape());
+}
+
+void Elu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::Elu(getTensorShape(input()), getTensorData<float>(input()),
+                                getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Elu.h b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.h
new file mode 100644
index 000000000..c844ab57f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ELU_H
+#define LUCI_INTERPRETER_KERNELS_ELU_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Elu : public Kernel
+{
+public:
+  Elu(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ELU_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Elu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.test.cpp
new file mode 100644
index 000000000..814499cdb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.test.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Elu.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Elu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  (void)output_shape;
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+}
+
+TEST(EluTest, SimpleElu)
+{
+  Check(
+    /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+    /*input_data=*/
+    {
+      0, -6, 2, -4,    //
+      3, -2, 10, -0.1, //
+    },
+    /*output_data=*/
+    {
+      0.0, -0.997521, 2.0, -0.981684,   //
+      3.0, -0.864665, 10.0, -0.0951626, //
+    });
+}
+
+TEST(EluTest, InOutTypeMismatch_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0, -6, 2,  -4,   //
+    3, -2, 10, -0.1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Elu kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Equal.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.cpp
new file mode 100644
index 000000000..a57e127b7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Equal.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Equal::Equal(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Equal::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Equal::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Equal::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowEqual(op_params, getTensorShape(x()), x_data,
+                                                getTensorShape(y()), y_data,
+                                                getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::Equal(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                 y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void Equal::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                         getTensorShape(y()), y_data,
+                                                         getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::EqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                          getTensorShape(y()), y_data, getTensorShape(output()),
+                                          output_data);
+  }
+}
+
+void Equal::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                                           getTensorShape(y()), y_data,
+                                                           getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::EqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                            getTensorShape(y()), y_data, getTensorShape(output()),
+                                            output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Equal.h b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.h
new file mode 100644
index 000000000..c9be32cc0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Equal : public Kernel
+{
+public:
+  Equal(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_EQUAL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Equal.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.test.cpp
new file mode 100644
index 000000000..5870e5460
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.test.cpp
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Equal.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class EqualTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(EqualTest, FloatSimple)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true, false, // Row 1
+    false, true, false, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(EqualTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+    0.9, 0.7, 0.5, // Row 4
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  false, // Row 1
+    false, false, false, // Row 2
+    false, false, false, // Row 3
+    true,  true,  true,  // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value, -2, max_value};
+
+  std::vector<bool> ref_output_data{true, false, true};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -2, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value, -2, max_value, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, false, // Row 1
+    false, false, true,  // Row 2
+    false, true,  false, // Row 3
+    true,  true,  true,  // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(EqualTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(EqualTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(EqualTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+    0.5, 0.5, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.5, 0.55, 0.5, // Row 1
+    -1,  0,   0.05, 1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true, false, false, // Row 1
+    false, true, true,  false, // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2);
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(EqualTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+    -1,   0.05, 0,    1,   // Row 4
+  };
+
+  std::vector<float> y_data{
+    -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, false, false, false, // Row 1
+    false, false, true,  false, // Row 2
+    false, false, false, false, // Row 3
+    true,  true,  true,  true,  // Row 4
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(EqualTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Exp.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.cpp
new file mode 100644
index 000000000..e7c560a88
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Exp.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/exp.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Exp::Exp(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Exp::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  output()->resize(input()->shape());
+}
+
+void Exp::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Exp::evalFloat() const
+{
+  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+  tflite::reference_ops::Exp(getTensorData<float>(input()), size, getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Exp.h b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.h
new file mode 100644
index 000000000..429177375
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_EXP_H
+#define LUCI_INTERPRETER_KERNELS_EXP_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Exp : public Kernel
+{
+public:
+  Exp(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_EXP_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Exp.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.test.cpp
new file mode 100644
index 000000000..a159d9db9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.test.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Exp.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(ExpTest, Float)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Shape input_shape{1, 1, 7};
+  std::vector<float> input_data{0.0f, 1.0f, -1.0f, 100.0f, -100.0f, 0.01f, -0.01f};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Exp kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<int32_t> ref_output_shape{1, 1, 7};
+  std::vector<float> ref_output_data{std::exp(0.0f),   std::exp(1.0f),    std::exp(-1.0f),
+                                     std::exp(100.0f), std::exp(-100.0f), std::exp(0.01f),
+                                     std::exp(-0.01f)};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.cpp
new file mode 100644
index 000000000..ba35c99fa
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ExpandDims.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ExpandDims::ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output)
+  : Kernel({input, axis}, {output})
+{
+}
+
+void ExpandDims::configure()
+{
+  int32_t axis_value;
+
+  switch (axis()->element_type())
+  {
+    case loco::DataType::S32:
+      axis_value = *getTensorData<int32_t>(axis());
+      break;
+    case loco::DataType::S64:
+      axis_value = static_cast<int32_t>(*getTensorData<int64_t>(axis()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+
+  const auto input_shape = input()->shape();
+
+  if (axis_value < 0)
+  {
+    axis_value += input_shape.num_dims() + 1;
+  }
+
+  LUCI_INTERPRETER_CHECK(axis_value <= input_shape.num_dims() and axis_value >= 0);
+
+  Shape output_shape(input_shape.num_dims() + 1);
+  for (int32_t i = 0; i < output_shape.num_dims(); ++i)
+  {
+    if (i < axis_value)
+    {
+      output_shape.dim(i) = input_shape.dim(i);
+    }
+    else if (i == axis_value)
+    {
+      output_shape.dim(i) = 1;
+    }
+    else
+    {
+      LUCI_INTERPRETER_CHECK(i >= 1);
+      output_shape.dim(i) = input_shape.dim(i - 1);
+    }
+  }
+
+  output()->resize(output_shape);
+}
+
+void ExpandDims::execute() const
+{
+  // Just copy input to output
+  const auto *input_data = input()->data<void>();
+  auto *output_data = output()->data<void>();
+
+  const size_t element_size = getDataTypeSize(input()->element_type());
+  const int32_t num_elements = input()->shape().num_elements();
+  std::memcpy(output_data, input_data, num_elements * element_size);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.h b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.h
new file mode 100644
index 000000000..e510b1160
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
+#define LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ExpandDims : public Kernel
+{
+public:
+  ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axis() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp
new file mode 100644
index 000000000..df9eaccc0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ExpandDims.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ExpandDimsTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ExpandDimsTest, PositiveAxis)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<int32_t> axis_value = {0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2}));
+}
+
+TEST_F(ExpandDimsTest, NegAxis)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<int32_t> axis_value = {-1};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 1}));
+}
+
+TEST_F(ExpandDimsTest, InvalidAxisType_NEG)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<float> axis_value = {1.0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::FLOAT32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ExpandDimsTest, InvalidAxisValue_NEG)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<int32_t> axis_value = {3};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Fill.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.cpp
new file mode 100644
index 000000000..e09d6331a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Fill.h"
+#include "kernels/Utils.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Fill::Fill(const Tensor *dims, const Tensor *value, Tensor *output)
+  : Kernel({dims, value}, {output})
+{
+}
+
+template <typename T> void Fill::configureShape()
+{
+  const auto dims_data = getTensorData<T>(dims());
+  Shape output_shape(dims()->shape().dim(0));
+
+  for (int i = 0; i < output_shape.num_dims(); ++i)
+  {
+    T data = dims_data[i];
+    if (data < 0)
+      throw std::runtime_error("Fill dimensions must be >= 0");
+
+    output_shape.dim(i) = data;
+  }
+
+  output()->resize(output_shape);
+}
+
+void Fill::configure()
+{
+  const auto dims_shape = dims()->shape();
+  const auto value_shape = value()->shape();
+
+  // Make sure the 1st input tensor is 1-D
+  LUCI_INTERPRETER_CHECK(dims_shape.num_dims() == 1);
+
+  // Make sure the 1st input tensor is int32 or int64
+  LUCI_INTERPRETER_CHECK(dims()->element_type() == DataType::S32 or
+                         dims()->element_type() == DataType::S64);
+
+  // Make sure the 2nd input tensor is a scalar
+  LUCI_INTERPRETER_CHECK(value_shape.num_dims() == 0)
+
+  // Check zero point and scale for S16 and S8
+  if (value()->element_type() == loco::DataType::S16 or
+      value()->element_type() == loco::DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(value()->scale() == output()->scale());
+    LUCI_INTERPRETER_CHECK(value()->zero_point() == output()->zero_point());
+
+    if (value()->element_type() == loco::DataType::S16)
+      LUCI_INTERPRETER_CHECK(value()->zero_point() == 0);
+  }
+  // Resize output
+  switch (dims()->element_type())
+  {
+    case DataType::S32:
+      configureShape<int32_t>();
+      break;
+    case DataType::S64:
+      configureShape<int64_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Fill::execute() const
+{
+  switch (output()->element_type())
+  {
+    case DataType::S8:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int8_t>(value()),
+                                  getTensorShape(output()), getTensorData<int8_t>(output()));
+      break;
+    case DataType::S16:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int16_t>(value()),
+                                  getTensorShape(output()), getTensorData<int16_t>(output()));
+      break;
+    case DataType::S32:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int32_t>(value()),
+                                  getTensorShape(output()), getTensorData<int32_t>(output()));
+      break;
+    case DataType::S64:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int64_t>(value()),
+                                  getTensorShape(output()), getTensorData<int64_t>(output()));
+      break;
+    case DataType::FLOAT32:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<float>(value()),
+                                  getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Fill.h b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.h
new file mode 100644
index 000000000..184f0cb83
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FILL_H
+#define LUCI_INTERPRETER_KERNELS_FILL_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Fill : public Kernel
+{
+public:
+  Fill(const Tensor *dims, const Tensor *value, Tensor *output);
+
+  const Tensor *dims() const { return _inputs[0]; }
+  const Tensor *value() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void configureShape();
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FILL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Fill.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.test.cpp
new file mode 100644
index 000000000..cf56df507
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.test.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Fill.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class FillTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+template <typename T, DataType DT> void runFillIntKernel(IMemoryManager *memory_manager)
+{
+  Shape dims_shape{2};
+
+  std::vector<int32_t> dims_data = {2, 3};
+  std::vector<T> value_data = {5};
+
+  Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, memory_manager);
+  Tensor value = makeInputTensor<DT>(/*scalar*/ {}, value_data, memory_manager);
+
+  Tensor output_tensor = makeOutputTensor(DT);
+
+  Fill kernel(&dims, &value, &output_tensor);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<T> ref_output_data{5, 5, 5, 5, 5, 5};
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ref_output_data);
+
+  std::vector<int32_t> ref_output_shape{2, 3};
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+template <DataType DT> void runFillQuantIntKernel(IMemoryManager *memory_manager)
+{
+  Shape dims_shape{2};
+
+  std::vector<int32_t> dims_data = {2, 3};
+  std::vector<float> value_data = {5};
+
+  int32_t zero_point = 0;
+
+  if (DT == loco::DataType::S8)
+    zero_point = 1;
+
+  Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, memory_manager);
+  Tensor value = makeInputTensor<DT>(/*scalar*/ {}, /*scale*/ 0.25, /*zero_point*/ zero_point,
+                                     value_data, memory_manager);
+
+  Tensor output_tensor = makeOutputTensor(DT, /*scale*/ 0.25, /*zero_point*/ zero_point);
+
+  Fill kernel(&dims, &value, &output_tensor);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{5, 5, 5, 5, 5, 5};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+
+  std::vector<int32_t> ref_output_shape{2, 3};
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FillTest, FillInt)
+{
+  // Run for int32_t input
+  runFillIntKernel<int32_t, loco::DataType::S32>(_memory_manager.get());
+  // Run for int64_t input
+  runFillIntKernel<int64_t, loco::DataType::S64>(_memory_manager.get());
+  // Run for int8_t input
+  runFillQuantIntKernel<loco::DataType::S8>(_memory_manager.get());
+  // Run for int16_t input
+  runFillQuantIntKernel<loco::DataType::S16>(_memory_manager.get());
+
+  SUCCEED();
+}
+
+TEST_F(FillTest, FillFloat)
+{
+  Shape dims_shape{3};
+
+  std::vector<int64_t> dims_data = {2, 2, 2};
+  std::vector<float> value_data = {5};
+
+  Tensor dims = makeInputTensor<loco::DataType::S64>(dims_shape, dims_data, _memory_manager.get());
+  Tensor value =
+    makeInputTensor<loco::DataType::FLOAT32>(/*scalar*/ {}, value_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+  Fill kernel(&dims, &value, &output_tensor);
+
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{5, 5, 5, 5, 5, 5, 5, 5};
+
+  std::vector<int32_t> ref_output_shape{2, 2, 2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), ref_output_data);
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FillTest, Invalid_Input_Shape_NEG)
+{
+  Shape dims_shape{1, 3};
+
+  std::vector<int32_t> dims_data = {2, 2, 2};
+  std::vector<float> value_data = {5};
+
+  Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, _memory_manager.get());
+  Tensor value =
+    makeInputTensor<loco::DataType::FLOAT32>(/*scalar*/ {}, value_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+  Fill kernel(&dims, &value, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(FillTest, Invalid_Value_Shape_NEG)
+{
+  Shape dims_shape{3};
+
+  std::vector<int32_t> dims_data = {2, 2, 2};
+  std::vector<float> value_data = {5};
+
+  Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, _memory_manager.get());
+  Tensor value = makeInputTensor<loco::DataType::FLOAT32>({1}, value_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+  Fill kernel(&dims, &value, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Floor.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.cpp
new file mode 100644
index 000000000..e3c4246cc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Floor.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/floor.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Floor::Floor(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Floor::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  output()->resize(input()->shape());
+}
+
+void Floor::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Floor::evalFloat() const
+{
+  tflite::reference_ops::Floor(getTensorShape(input()), getTensorData<float>(input()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Floor.h b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.h
new file mode 100644
index 000000000..ca3ad5997
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_H
+#define LUCI_INTERPRETER_KERNELS_FLOOR_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Floor : public Kernel
+{
+public:
+  Floor(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FLOOR_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Floor.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.test.cpp
new file mode 100644
index 000000000..30076fb54
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.test.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Floor.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class FloorTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(FloorTest, SimpleFloat)
+{
+  std::initializer_list<int32_t> input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0.2, 8.6, 2.4,  4.3,  // Row 1
+    3,   7.1, 10.5, -0.9, // Row 2
+  };
+
+  std::initializer_list<int32_t> ref_output_shape{1, 2, 4, 1};
+  std::vector<float> ref_output_data{
+    0, 8, 2,  4,  // Row 1
+    3, 7, 10, -1, // Row 2
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Floor kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorTest, Input_Output_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Floor kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.cpp
new file mode 100644
index 000000000..a7a10a336
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FloorDiv.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+FloorDiv::FloorDiv(const Tensor *input, const Tensor *alpha, Tensor *output)
+  : Kernel({input, alpha}, {output})
+{
+}
+
+void FloorDiv::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(y()->element_type() == output()->element_type());
+
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void FloorDiv::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void FloorDiv::evalFloat() const
+{
+  auto FloorDivFunc = [](float x, float y) -> float {
+    return std::floor(static_cast<double>(x) / static_cast<double>(y));
+  };
+
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+
+  // Check the denominator
+  for (int i = 0; i < getTensorShape(y()).FlatSize(); ++i)
+  {
+    LUCI_INTERPRETER_CHECK(y_data[i] != 0);
+  }
+
+  if (x()->shape() != y()->shape())
+  {
+    tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
+      getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      getTensorData<float>(output()), FloorDivFunc);
+  }
+  else
+  {
+    tflite::reference_ops::BinaryFunction<float, float, float>(
+      getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      getTensorData<float>(output()), FloorDivFunc);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.h b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.h
new file mode 100644
index 000000000..e9c47d81a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H
+#define LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class FloorDiv : public Kernel
+{
+public:
+  FloorDiv(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp
new file mode 100644
index 000000000..3e1b5f18e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FloorDiv.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class FloorDivTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(FloorDivTest, FloatSimple)
+{
+  Shape x_shape{2, 3};
+  std::vector<float> x_data{
+    0.5, 2.4,  3.1,  // Row 1
+    1.9, -1.9, -2.8, // Row 2
+  };
+
+  Shape y_shape = x_shape;
+  std::vector<float> y_data{
+    2.0, 0.5,  3.0,  // Row 1
+    1.0, -1.0, -2.0, // Row 2
+  };
+
+  std::vector<int32_t> ref_output_shape{2, 3};
+  std::vector<float> ref_output_data{
+    0, 4, 1, // Row 1
+    1, 1, 1, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorDivTest, FloatBroadcast)
+{
+  Shape x_shape{1, 3};
+  std::vector<float> x_data{
+    0.5, 2.4, -3.1, // Row 1
+  };
+
+  Shape y_shape{3, 3};
+  std::vector<float> y_data{
+    1.0, 1.0,  1.0,  // Row 1
+    2.0, -0.5, -2.0, // Row 2
+    0.3, 0.7,  0.9,  // Row 3
+  };
+
+  std::vector<int32_t> ref_output_shape{3, 3};
+  std::vector<float> ref_output_data{
+    0, 2,  -4, // Row 1
+    0, -5, 1,  // Row 2
+    1, 3,  -4, // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorDivTest, DivByZero_NEG)
+{
+  Shape shape{3};
+  std::vector<float> x_data{1, 0, -1};
+  std::vector<float> y_data{0, 0, 0};
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(shape, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(shape, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(FloorDivTest, Input_Output_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(FloorDivTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.cpp
new file mode 100644
index 000000000..bd2bb2f35
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FullyConnected.h"
+
+#include "kernels/Utils.h"
+
+#include "PALFullyConnected.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+FullyConnected::FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias,
+                               Tensor *output, const FullyConnectedParams &params)
+  : KernelWithParams<FullyConnectedParams>({input, weights, bias}, {output}, params)
+{
+}
+
+void FullyConnected::configure()
+{
+  if (weights()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::U8);
+    LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32)
+  }
+  else if (weights()->element_type() == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::FLOAT32)
+  }
+  else if (weights()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S8);
+    LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32)
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+
+  const Shape &input_shape = input()->shape();
+  const Shape &weights_shape = weights()->shape();
+
+  LUCI_INTERPRETER_CHECK(weights_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(bias() == nullptr ||
+                         bias()->shape().num_elements() == weights_shape.dim(0));
+
+  LUCI_INTERPRETER_CHECK(input_shape.num_elements() % weights_shape.dim(1) == 0);
+  const int32_t batch_size = input_shape.num_elements() / weights_shape.dim(1);
+  const int32_t num_units = weights_shape.dim(0);
+
+  if (bias())
+    LUCI_INTERPRETER_CHECK(bias()->shape().num_elements() == weights()->shape().dim(0));
+
+  if (params().keep_num_dims == false)
+  {
+    output()->resize({batch_size, num_units});
+  }
+  else
+  {
+    luci_interpreter::Shape output_shape(input_shape.num_dims());
+    for (int i = 0; i < input_shape.num_dims(); ++i)
+      output_shape.dim(i) = input_shape.dim(i);
+    output_shape.dim(input_shape.num_dims() - 1) = num_units;
+    output()->resize(output_shape);
+  }
+}
+
+void FullyConnected::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S8:
+      evalQuantizedS8();
+      break;
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void FullyConnected::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::FullyConnectedParams params{};
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+  params.weights_format = tflite::FullyConnectedWeightsFormat::kDefault;
+
+  tflite::reference_ops::FullyConnected(
+    params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(weights()),
+    getTensorData<float>(weights()), getTensorShape(bias()), getTensorData<float>(bias()),
+    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void FullyConnected::evalQuantized() const
+{
+  double real_multiplier = 0.0;
+  int output_shift;
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+  int32_t output_multiplier;
+  real_multiplier =
+    getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale());
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+  calculateActivationRangeQuantized(params().activation, output(), &output_activation_min,
+                                    &output_activation_max);
+
+  int32_t input_offset = -input()->zero_point();
+  int32_t filter_offset = -weights()->zero_point();
+  int32_t output_offset = output()->zero_point();
+
+  tflite::FullyConnectedParams op_params{};
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.lhs_cacheable = false;
+  op_params.rhs_cacheable = false;
+  tflite::reference_ops::FullyConnected(
+    op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(weights()),
+    getTensorData<uint8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+    getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void FullyConnected::evalQuantizedS8() const
+{
+  double real_multiplier = 0.0;
+  int output_shift;
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+  int32_t output_multiplier;
+  real_multiplier =
+    getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale());
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+  calculateActivationRangeQuantized(params().activation, output(), &output_activation_min,
+                                    &output_activation_max);
+
+  int32_t input_offset = -input()->zero_point();
+  int32_t filter_offset = -weights()->zero_point();
+  int32_t output_offset = output()->zero_point();
+
+  tflite::FullyConnectedParams op_params{};
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.lhs_cacheable = false;
+  op_params.rhs_cacheable = false;
+  luci_interpreter_pal::FullyConnected<int8_t>(
+    op_params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(weights()),
+    getTensorData<int8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+    getTensorShape(output()), getTensorData<int8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.h b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.h
new file mode 100644
index 000000000..2a7c068c0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class FullyConnected : public KernelWithParams<FullyConnectedParams>
+{
+public:
+  FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias, Tensor *output,
+                 const FullyConnectedParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *weights() const { return _inputs[1]; }
+  const Tensor *bias() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedS8() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp
new file mode 100644
index 000000000..4474cc4fb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FullyConnected.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
+           std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
+           std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+}
+
+template <>
+void Check<int8_t>(std::initializer_list<int32_t> input_shape,
+                   std::initializer_list<int32_t> weights_shape,
+                   std::initializer_list<int32_t> bias_shape,
+                   std::initializer_list<int32_t> output_shape,
+                   std::initializer_list<float> input_data,
+                   std::initializer_list<float> weights_data,
+                   std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  const float quantized_tolerance = getTolerance(-127, 128, 255);
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-63.5, 64);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::S8>(weights_shape, input_quant_param.first, input_quant_param.second,
+                                  weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
+                                   bias_data, memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, quantized_tolerance));
+}
+
+template <>
+void Check<uint8_t>(
+  std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
+  std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+  std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
+  std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  const float quantized_tolerance = getTolerance(-127, 128, 255);
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::U8>(weights_shape, input_quant_param.first, input_quant_param.second,
+                                  weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
+                                   bias_data, memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, quantized_tolerance));
+}
+
+template <typename T> class FullyConnectedTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(FullyConnectedTest, DataTypes);
+
+TYPED_TEST(FullyConnectedTest, Simple)
+{
+  Check<TypeParam>({3, 2, 2, 1}, {3, 6}, {3}, {2, 3},
+                   {
+                     -3, -5, 5, 4, 9, -2,  // batch = 0
+                     -3, -2, -4, 9, -8, 1, // batch = 1
+                   },
+                   {
+                     -3, -7, 4, -4, -6, 4, // unit = 0
+                     3, 5, 2, 3, -3, -8,   // unit = 1
+                     -3, 7, 4, 9, 0, -5,   // unit = 2
+                   },
+                   {-1, -5, -8},
+                   {
+                     0, 0, 32,   // batch = 0
+                     22, 11, 47, // batch = 1
+                   });
+}
+
+TEST(FullyConnectedTest, InvalidBiasType_NEG)
+{
+  Shape input_shape{3, 2, 2, 1};
+  std::vector<float> input_data{
+    -3, -5, 5,  4, 9,  -2, // batch = 0
+    -3, -2, -4, 9, -8, 1,  // batch = 1
+  };
+  Shape weights_shape{3, 6};
+  std::vector<float> weights_data{
+    -3, -7, 4, -4, -6, 4,  // unit = 0
+    3,  5,  2, 3,  -3, -8, // unit = 1
+    -3, 7,  4, 9,  0,  -5, // unit = 2
+  };
+  Shape bias_shape{3};
+  std::vector<int32_t> bias_data{-1, -5, -8};
+
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(FullyConnectedTest, InvalidWeightShapeDim_NEG)
+{
+  Shape input_shape{3, 2, 2, 1};
+  std::vector<float> input_data{
+    -3, -5, 5,  4, 9,  -2, // batch = 0
+    -3, -2, -4, 9, -8, 1,  // batch = 1
+  };
+  Shape weights_shape{1, 3, 6};
+  std::vector<float> weights_data{
+    -3, -7, 4, -4, -6, 4,  // unit = 0
+    3,  5,  2, 3,  -3, -8, // unit = 1
+    -3, 7,  4, 9,  0,  -5, // unit = 2
+  };
+  Shape bias_shape{3};
+  std::vector<float> bias_data{-1, -5, -8};
+
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(FullyConnectedTest, BiasElementNumWeightDimMismatch_NEG)
+{
+  Shape input_shape{3, 2, 2, 1};
+  std::vector<float> input_data{
+    -3, -5, 5,  4, 9,  -2, // batch = 0
+    -3, -2, -4, 9, -8, 1,  // batch = 1
+  };
+  Shape weights_shape{6, 3};
+  std::vector<float> weights_data{
+    -3, -7, 4,  // unit = 0
+    -4, -6, 4,  // unit = 1
+    3,  5,  2,  // unit = 2
+    3,  -3, -8, // unit = 3
+    -3, 7,  4,  // unit = 4
+    9,  0,  -5, // unit = 5
+  };
+  Shape bias_shape{3};
+  std::vector<float> bias_data{-1, -5, -8};
+
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Gather.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.cpp
new file mode 100644
index 000000000..f1256660f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Gather.h"
+#include "kernels/Utils.h"
+#include "PALGather.h"
+
+#include <stdexcept>
+#include <cassert>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Gather::Gather(const Tensor *params, const Tensor *indices, Tensor *output,
+               const GatherParams &gparams)
+  : KernelWithParams<GatherParams>({params, indices}, {output}, gparams)
+{
+}
+
+void Gather::configure()
+{
+  if (params()->element_type() == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+
+  LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32 ||
+                         indices()->element_type() == DataType::S64);
+
+  // refer tensorflow/lite/kernels/gather.cc
+
+  const Shape &params_shape = params()->shape();
+  const Shape &indices_shape = indices()->shape();
+
+  int axis = _params.axis;
+  if (axis < 0)
+  {
+    axis += params_shape.num_dims();
+  }
+  LUCI_INTERPRETER_CHECK(0 <= axis && axis < params_shape.num_dims());
+
+  int batch_dims = _params.batch_dims;
+  // batch_dims should be in range: [-rank(indices), rank(indices)].
+  // Negative batch_dims is added with rank of positions.
+  if (batch_dims < 0)
+  {
+    batch_dims += indices_shape.num_dims();
+  }
+  LUCI_INTERPRETER_CHECK(batch_dims <= axis);
+  LUCI_INTERPRETER_CHECK(0 <= batch_dims && batch_dims < params_shape.num_dims());
+  LUCI_INTERPRETER_CHECK(batch_dims <= indices_shape.num_dims());
+  for (int i = 0; i < batch_dims; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(params_shape.dim(i) == indices_shape.dim(i));
+  }
+
+  const int num_dimensions = params_shape.num_dims() + indices_shape.num_dims() - 1 - batch_dims;
+
+  Shape output_shape(num_dimensions);
+  int output_index = 0;
+  for (int i = 0; i < axis; ++i)
+  {
+    output_shape.dim(output_index++) = params_shape.dim(i);
+  }
+  for (int i = batch_dims; i < indices_shape.num_dims(); ++i)
+  {
+    output_shape.dim(output_index++) = indices_shape.dim(i);
+  }
+  for (int i = axis + 1; i < params_shape.num_dims(); ++i)
+  {
+    output_shape.dim(output_index++) = params_shape.dim(i);
+  }
+  output()->resize(output_shape);
+}
+
+void Gather::execute() const
+{
+  switch (params()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Gather::evalFloat() const
+{
+  assert(indices()->element_type() == DataType::S32 || indices()->element_type() == DataType::S64);
+
+  const auto params_data = getTensorData<float>(params());
+  auto output_data = getTensorData<float>(output());
+
+  tflite::GatherParams tparams;
+  tparams.axis = _params.axis;
+  tparams.batch_dims = _params.batch_dims;
+
+  if (indices()->element_type() == DataType::S32)
+  {
+    const auto indices_data = getTensorData<int32_t>(indices());
+
+    luci_interpreter_pal::Gather<float, int32_t>(tparams, getTensorShape(params()), params_data,
+                                                 getTensorShape(indices()), indices_data,
+                                                 getTensorShape(output()), output_data);
+  }
+  else
+  {
+    const auto indices_data = getTensorData<int64_t>(indices());
+
+    luci_interpreter_pal::Gather<float, int64_t>(tparams, getTensorShape(params()), params_data,
+                                                 getTensorShape(indices()), indices_data,
+                                                 getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Gather.h b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.h
new file mode 100644
index 000000000..cc02d64fb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_GATHER_H
+#define LUCI_INTERPRETER_KERNELS_GATHER_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Gather : public KernelWithParams<GatherParams>
+{
+public:
+  Gather(const Tensor *params, const Tensor *indices, Tensor *output, const GatherParams &gparams);
+
+  const Tensor *params() const { return _inputs[0]; }
+  const Tensor *indices() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_GATHER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Gather.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.test.cpp
new file mode 100644
index 000000000..4b3dda708
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.test.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Gather.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class GatherTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(GatherTest, Simple)
+{
+  std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
+  std::vector<int32_t> indices_data{1, 0, 1, 5};
+  std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f};
+
+  Tensor params_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get());
+  Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  gparams.axis = 1;
+  gparams.batch_dims = 0;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4}));
+}
+
+TEST_F(GatherTest, Simple_Batch)
+{
+  Shape params_shape = {3, 5};
+  Shape indices_shape = {3, 2};
+  std::vector<float> params_data{0., 0., 1., 0., 2., 3., 0., 0., 0., 4., 0., 5., 0., 6., 0.};
+  std::vector<int32_t> indices_data{2, 4, 0, 4, 1, 3};
+  std::vector<float> ref_output_data{1., 2., 3., 4., 5., 6.};
+
+  Tensor params_tensor =
+    makeInputTensor<DataType::FLOAT32>(params_shape, params_data, _memory_manager.get());
+  Tensor indices_tensor =
+    makeInputTensor<DataType::S32>(indices_shape, indices_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  gparams.axis = 1;
+  gparams.batch_dims = 1;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 2}));
+}
+
+TEST_F(GatherTest, Simple_NEG)
+{
+  Tensor params_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+  Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GatherTest, Axis_NEG)
+{
+  Tensor params_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  gparams.axis = 100;
+  gparams.batch_dims = 0;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GatherTest, Batch_NEG)
+{
+  std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
+  std::vector<int32_t> indices_data{1, 0, 1, 5};
+  std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f};
+
+  Tensor params_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get());
+  Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  gparams.axis = 0;
+  gparams.batch_dims = 1;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Greater.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.cpp
new file mode 100644
index 000000000..5ccae3c38
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Greater.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Greater::Greater(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Greater::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Greater::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Greater::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreater(op_params, getTensorShape(x()), x_data,
+                                                  getTensorShape(y()), y_data,
+                                                  getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::Greater(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                   y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void Greater::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterNoScaling(op_params, getTensorShape(x()), x_data,
+                                                           getTensorShape(y()), y_data,
+                                                           getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterNoScaling(op_params, getTensorShape(x()), x_data,
+                                            getTensorShape(y()), y_data, getTensorShape(output()),
+                                            output_data);
+  }
+}
+
+void Greater::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterWithScaling(op_params, getTensorShape(x()), x_data,
+                                                             getTensorShape(y()), y_data,
+                                                             getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterWithScaling(op_params, getTensorShape(x()), x_data,
+                                              getTensorShape(y()), y_data, getTensorShape(output()),
+                                              output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Greater.h b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.h
new file mode 100644
index 000000000..065f76d7b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_GREATER_H
+#define LUCI_INTERPRETER_KERNELS_GREATER_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Greater : public Kernel
+{
+public:
+  Greater(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_GREATER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Greater.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.test.cpp
new file mode 100644
index 000000000..a48080124
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.test.cpp
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Greater.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class GreaterTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(GreaterTest, FloatSimple)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, false, true,  // Row 1
+    true,  false, false, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(GreaterTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, false, true,  // Row 1
+    true,  false, false, // Row 2
+    false, false, true,  // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+  std::vector<bool> ref_output_data{false, true, false};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -4, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value + 1, -2, max_value - 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  false, // Row 1
+    true,  true,  true,  // Row 2
+    true,  false, false, // Row 3
+    false, false, true,  // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(GreaterTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(GreaterTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(GreaterTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, false, true, true,  // Row 1
+    true,  false, true, false, // Row 2
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterTest, Uint8QuantizedRescale)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, false, true, true,  // Row 1
+    true,  false, true, false, // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 3);
+
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+  };
+
+  std::vector<float> y_data{
+    -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true, false, true,  false, // Row 1
+    true, true,  false, false, // Row 2
+    true, false, true,  false, // Row 3
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.cpp
new file mode 100644
index 000000000..27e42c971
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/GreaterEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+GreaterEqual::GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output)
+  : Kernel({x, y}, {output})
+{
+}
+
+void GreaterEqual::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void GreaterEqual::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void GreaterEqual::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterEqual(op_params, getTensorShape(x()), x_data,
+                                                       getTensorShape(y()), y_data,
+                                                       getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                        y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void GreaterEqual::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                 getTensorShape(y()), y_data,
+                                                 getTensorShape(output()), output_data);
+  }
+}
+
+void GreaterEqual::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                                   getTensorShape(y()), y_data,
+                                                   getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.h b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.h
new file mode 100644
index 000000000..e333c30a6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class GreaterEqual : public Kernel
+{
+public:
+  GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp
new file mode 100644
index 000000000..35bf88eab
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp
@@ -0,0 +1,333 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/GreaterEqual.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class GreaterEqualTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(GreaterEqualTest, FloatSimple)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true, true,  // Row 1
+    true,  true, false, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(GreaterEqualTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  true,  // Row 1
+    true,  false, false, // Row 2
+    false, false, true,  // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+  std::vector<bool> ref_output_data{false, true, true};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,             // Row 1
+    4,         5,  max_value,     // Row 2
+    -1,        -4, -3,            // Row 3
+    min_value, -2, max_value - 1, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value + 1, -2, max_value - 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  false, // Row 1
+    true,  true,  true,  // Row 2
+    true,  false, false, // Row 3
+    false, true,  true,  // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(GreaterEqualTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(GreaterEqualTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(GreaterEqualTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.55, 0.5, // Row 1
+    -1,  0.05, 0,    1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  true, true,  // Row 1
+    true,  false, true, false, // Row 2
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterEqualTest, Uint8QuantizedRescale)
+{
+  std::vector<float> x_data{
+    0.5, 0.5, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.5,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  true, true,  // Row 1
+    true,  false, true, false, // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
+
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterEqualTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+  };
+
+  std::vector<float> y_data{
+    -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true, false, true, false, // Row 1
+    true, true,  true, false, // Row 2
+    true, false, true, false, // Row 3
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterEqualTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/If.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/If.cpp
new file mode 100644
index 000000000..971708bca
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/If.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/If.h"
+#include "kernels/Utils.h"
+
+#include <cstring>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+static std::vector<const Tensor *> joinInputs(const Tensor *cond,
+                                              const std::vector<const Tensor *> &inputs)
+{
+  std::vector<const Tensor *> result{cond};
+  result.insert(result.cend(), inputs.cbegin(), inputs.cend());
+  return result;
+}
+
+If::If(const Tensor *cond, const std::vector<const Tensor *> &inputs, std::vector<Tensor *> outputs,
+       RuntimeGraph *then_graph, RuntimeGraph *else_graph)
+  : Kernel(joinInputs(cond, inputs), std::move(outputs)), _then_graph(then_graph),
+    _else_graph(else_graph)
+{
+}
+
+void If::configure()
+{
+  LUCI_INTERPRETER_CHECK(cond()->element_type() == DataType::BOOL);
+  LUCI_INTERPRETER_CHECK(cond()->shape().num_elements() == 1);
+
+  for (RuntimeGraph *graph : {_then_graph, _else_graph})
+  {
+    (void)graph;
+    LUCI_INTERPRETER_CHECK(graph->getInputTensors().size() == getInputTensors().size() - 1);
+    LUCI_INTERPRETER_CHECK(graph->getOutputTensors().size() == getOutputTensors().size());
+  }
+}
+
+void If::execute() const
+{
+  const bool cond_value = cond()->data<bool>()[0];
+
+  RuntimeGraph *active_graph = cond_value ? _then_graph : _else_graph;
+  const auto &graph_inputs = active_graph->getInputTensors();
+  const auto &graph_outputs = active_graph->getOutputTensors();
+
+  // Copy kernel inputs to active graph inputs.
+  for (size_t i = 0; i < getInputTensors().size() - 1; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(graph_inputs[i]->element_type() == input(i)->element_type());
+    graph_inputs[i]->resize(input(i)->shape());
+
+    const int32_t num_elements = input(i)->shape().num_elements();
+    const std::size_t element_size = getDataTypeSize(input(i)->element_type());
+    // TODO: Think about how allocate memory for output in main graph
+    active_graph->configureAllocations(graph_inputs[i]);
+    std::memcpy(graph_inputs[i]->data<void>(), input(i)->data<void>(), num_elements * element_size);
+  }
+
+  active_graph->execute();
+
+  // Copy graph outputs to kernel outputs.
+  for (size_t i = 0; i < getOutputTensors().size(); ++i)
+  {
+    LUCI_INTERPRETER_CHECK(graph_outputs[i]->element_type() == output(i)->element_type());
+    output(i)->resize(graph_outputs[i]->shape());
+    // TODO: Think about how allocate memory for output in main graph
+    active_graph->configureAllocations(output(i));
+
+    const int32_t num_elements = output(i)->shape().num_elements();
+    const std::size_t element_size = getDataTypeSize(output(i)->element_type());
+    std::memcpy(output(i)->data<void>(), graph_outputs[i]->data<void>(),
+                num_elements * element_size);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/If.h b/compiler/luci-micro/luci-interpreter/src/kernels/If.h
new file mode 100644
index 000000000..fa6ab371a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/If.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_IF_H
+#define LUCI_INTERPRETER_KERNELS_IF_H
+
+#include "core/Kernel.h"
+#include "core/RuntimeGraph.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class If : public Kernel
+{
+public:
+  If(const Tensor *cond, const std::vector<const Tensor *> &inputs, std::vector<Tensor *> outputs,
+     RuntimeGraph *then_graph, RuntimeGraph *else_graph);
+
+  const Tensor *cond() const { return _inputs[0]; }
+  const Tensor *input(int index) const { return _inputs[1 + index]; }
+  Tensor *output(int index) const { return _outputs[index]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  RuntimeGraph *const _then_graph;
+  RuntimeGraph *const _else_graph;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_IF_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/If.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/If.test.cpp
new file mode 100644
index 000000000..c5f4faf75
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/If.test.cpp
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/RuntimeModule.h"
+#include "kernels/Add.h"
+#include "kernels/If.h"
+#include "kernels/Mul.h"
+#include "kernels/TestUtils.h"
+
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class IfTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+RuntimeGraph *buildAddSubgraph(RuntimeModule *module, IMemoryManager *memory_manager)
+{
+  RuntimeGraph *graph = module->addGraph(memory_manager);
+  Tensor *input1 = graph->addTensor(
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+  Tensor *input2 = graph->addTensor(
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+  Tensor *output = graph->addTensor(
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+
+  memory_manager->allocate_memory(*input1);
+  memory_manager->allocate_memory(*input2);
+  memory_manager->allocate_memory(*output);
+
+  graph->setInputTensors({input1, input2});
+  graph->setOutputTensors({output});
+
+  AddParams params{};
+  params.activation = Activation::NONE;
+  graph->addKernel(std::make_unique<Add>(input1, input2, output, params));
+
+  return graph;
+}
+
+RuntimeGraph *buildMulSubgraph(RuntimeModule *module, IMemoryManager *memory_manager)
+{
+  RuntimeGraph *graph = module->addGraph(memory_manager);
+  Tensor *input1 = graph->addTensor(
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+  Tensor *input2 = graph->addTensor(
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+  Tensor *output = graph->addTensor(
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+
+  memory_manager->allocate_memory(*input1);
+  memory_manager->allocate_memory(*input2);
+  memory_manager->allocate_memory(*output);
+
+  graph->setInputTensors({input1, input2});
+  graph->setOutputTensors({output});
+
+  MulParams params{};
+  params.activation = Activation::NONE;
+  graph->addKernel(std::make_unique<Mul>(input1, input2, output, params));
+
+  return graph;
+}
+
+TEST_F(IfTest, CondTrue)
+{
+  Tensor cond = makeInputTensor<DataType::BOOL>({1}, {true}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
+
+  If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+  kernel.configure();
+  _memory_manager->allocate_memory(output);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({6, 9}));
+}
+
+TEST_F(IfTest, CondFalse)
+{
+  Tensor cond = makeInputTensor<DataType::BOOL>({1}, {false}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
+
+  If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+  kernel.configure();
+  _memory_manager->allocate_memory(output);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({5, 14}));
+}
+
+TEST_F(IfTest, InvalidCondType_NEG)
+{
+  Tensor cond = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
+
+  If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(IfTest, InvalidCondElementNum_NEG)
+{
+  Tensor cond = makeInputTensor<DataType::BOOL>({2}, {false, true}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
+
+  If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.cpp
new file mode 100644
index 000000000..22a329be6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/InstanceNorm.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/common.h>
+#include <cmath>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+InstanceNorm::InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta,
+                           Tensor *output, const InstanceNormParams &params)
+  : KernelWithParams<InstanceNormParams>({input, gamma, beta}, {output}, params)
+{
+}
+
+void InstanceNorm::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(gamma()->element_type() == input()->element_type());
+  LUCI_INTERPRETER_CHECK(gamma()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(gamma()->shape().dim(0) == input()->shape().dim(3) ||
+                         gamma()->shape().dim(0) == 1);
+  LUCI_INTERPRETER_CHECK(beta()->element_type() == input()->element_type());
+  LUCI_INTERPRETER_CHECK(beta()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(beta()->shape().dim(0) == input()->shape().dim(3) ||
+                         beta()->shape().dim(0) == 1);
+  output()->resize(input()->shape());
+}
+
+void InstanceNorm::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void InstanceNorm::evalFloat() const
+{
+  float activation_min, activation_max;
+  calculateActivationRange(params().activation, &activation_min, &activation_max);
+  auto input_shape = getTensorShape(input());
+  auto output_shape = getTensorShape(output());
+  const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+  const int32_t heights = tflite::MatchingDim(input_shape, 1, output_shape, 1);
+  const int32_t widths = tflite::MatchingDim(input_shape, 2, output_shape, 2);
+  const int32_t channels = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+  const float *input_data = getTensorData<float>(input());
+  const float *gamma_data = getTensorData<float>(gamma());
+  auto gamma_shape = getTensorShape(gamma());
+  bool single_gamma = gamma_shape.DimensionsCount() == 1 && gamma_shape.Dims(0) == 1;
+  const float *beta_data = getTensorData<float>(beta());
+  auto beta_shape = getTensorShape(beta());
+  bool single_beta = beta_shape.DimensionsCount() == 1 && beta_shape.Dims(0) == 1;
+  float *output_data = getTensorData<float>(output());
+  for (int32_t batch = 0; batch < batches; batch++)
+  {
+    for (int32_t channel = 0; channel < channels; channel++)
+    {
+      double sum = 0.0f;
+      double square_sum = 0.0f;
+      int32_t size = heights * widths;
+      for (int32_t height = 0; height < heights; height++)
+      {
+        for (int32_t width = 0; width < widths; width++)
+        {
+          double input_val = input_data[tflite::Offset(input_shape, batch, height, width, channel)];
+          sum += input_val;
+          square_sum += (input_val * input_val);
+        }
+      }
+      double mean = sum / size;
+      double var = square_sum / size - mean * mean;
+
+      double gamma = single_gamma ? gamma_data[0] : gamma_data[channel];
+      double beta = single_beta ? beta_data[0] : beta_data[channel];
+      double a = gamma / (std::sqrt(var + params().epsilon));
+      double b = -mean * a + beta;
+
+      for (int32_t height = 0; height < heights; height++)
+      {
+        for (int32_t width = 0; width < widths; width++)
+        {
+          double input_value =
+            input_data[tflite::Offset(output_shape, batch, height, width, channel)];
+          double output_value = input_value * a + b;
+          output_data[tflite::Offset(output_shape, batch, height, width, channel)] =
+            tflite::ActivationFunctionWithMinMax((float)output_value, activation_min,
+                                                 activation_max);
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.h b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.h
new file mode 100644
index 000000000..a70a84e0a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_INSTANCENORM_H
+#define LUCI_INTERPRETER_KERNELS_INSTANCENORM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class InstanceNorm : public KernelWithParams<InstanceNormParams>
+{
+public:
+  InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta, Tensor *output,
+               const InstanceNormParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *gamma() const { return _inputs[1]; }
+  const Tensor *beta() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_INSTANCENORM_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp
new file mode 100644
index 000000000..04400c3c0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "kernels/InstanceNorm.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class InstanceNormTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(InstanceNormTest, Simple)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 2, 1}, {1, 1, 1, 1}, _memory_manager.get());
+  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  InstanceNormParams params{};
+  params.epsilon = 0.1f;
+  params.activation = Activation::NONE;
+
+  InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1}));
+}
+
+TEST_F(InstanceNormTest, Single_gamma_beta)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get());
+  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  InstanceNormParams params{};
+  params.epsilon = 0.1f;
+  params.activation = Activation::NONE;
+
+  InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 2}));
+}
+
+TEST_F(InstanceNormTest, Wrong_gamma_beta_dim_NEG)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get());
+  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1, 1, 1}, _memory_manager.get());
+  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({3}, {2, 2, 2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  InstanceNormParams params{};
+  params.epsilon = 0.1f;
+  params.activation = Activation::NONE;
+
+  InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.cpp
new file mode 100644
index 000000000..64222953f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/L2Normalize.h"
+#include "kernels/Utils.h"
+
+#include "PALL2Normalize.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+L2Normalize::L2Normalize(const Tensor *input, Tensor *output, const L2NormParams &params)
+  : KernelWithParams<L2NormParams>({input}, {output}, params)
+{
+}
+
+void L2Normalize::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= 4);
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
+                         output()->element_type() == DataType::U8);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (output()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(output()->scale() == (1. / 128.));
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == 128);
+  }
+  LUCI_INTERPRETER_CHECK(params().activation == Activation::NONE);
+  output()->resize(input()->shape());
+}
+
+void L2Normalize::execute() const
+{
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      eval<float>(0);
+      break;
+    case DataType::U8:
+      eval<uint8_t>(input()->zero_point());
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void L2Normalize::eval(int32_t zero_point) const
+{
+  tflite::L2NormalizationParams op_params{};
+  op_params.input_zero_point = zero_point;
+  luci_interpreter_pal::L2Normalization(op_params, getTensorShape(input()),
+                                        getTensorData<T>(input()), getTensorShape(output()),
+                                        getTensorData<T>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.h b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.h
new file mode 100644
index 000000000..6c7dac698
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H
+#define LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class L2Normalize : public KernelWithParams<L2NormParams>
+{
+public:
+  L2Normalize(const Tensor *input, Tensor *output, const L2NormParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void eval(int32_t zero_point) const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp
new file mode 100644
index 000000000..6f960e8b4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "kernels/L2Normalize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  L2NormParams params{};
+  params.activation = Activation::NONE;
+
+  L2Normalize kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::pair<float, int32_t> quant_param =
+    quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
+                                std::max(input_data) > 0 ? std::max(input_data) : 0.f);
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 128., 128);
+
+  L2NormParams params{};
+  params.activation = Activation::NONE;
+
+  L2Normalize kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class L2NormalizeTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(L2NormalizeTest, DataTypes);
+
+TYPED_TEST(L2NormalizeTest, Simple)
+{
+  Check<TypeParam>({1, 1, 1, 6}, {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1},
+                   {-0.55, 0.3, 0.35, 0.6, -0.35, 0.05});
+}
+
+TEST(L2NormalizeTest, ActivationType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  L2NormParams params{};
+  params.activation = Activation::RELU6;
+
+  L2Normalize kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(L2NormalizeTest, InvalidOutputQuantParam_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 1, 1, 6}, 1. / 64., 127, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 64., 127);
+
+  L2NormParams params{};
+  params.activation = Activation::NONE;
+
+  L2Normalize kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.cpp
new file mode 100644
index 000000000..5a88808d5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/L2Pool2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALL2Pool2D.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+L2Pool2D::L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
+  : KernelWithParams<Pool2DParams>({input}, {output}, params)
+{
+}
+
+void L2Pool2D::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  int batches = input()->shape().dim(0);
+  int height = input()->shape().dim(1);
+  int width = input()->shape().dim(2);
+  int channels_out = input()->shape().dim(3);
+
+  // Matching GetWindowedOutputSize in TensorFlow.
+  auto padding = params().padding;
+  int out_width, out_height;
+  out_width = computeOutputSize(padding, width, params().filter_width, params().stride_width, 1);
+  out_height =
+    computeOutputSize(padding, height, params().filter_height, params().stride_height, 1);
+  _padding_width =
+    computePadding(params().stride_width, 1, width, params().filter_width, out_width);
+  _padding_height =
+    computePadding(params().stride_height, 1, height, params().filter_height, out_height);
+
+  LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32);
+  output()->resize({batches, out_height, out_width, channels_out});
+}
+
+void L2Pool2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      float activation_min, activation_max;
+      calculateActivationRange(params().activation, &activation_min, &activation_max);
+      tflite::PoolParams op_params;
+      op_params.stride_height = params().stride_height;
+      op_params.stride_width = params().stride_width;
+      op_params.filter_height = params().filter_height;
+      op_params.filter_width = params().filter_width;
+      op_params.padding_values.height = _padding_height;
+      op_params.padding_values.width = _padding_width;
+      op_params.float_activation_min = activation_min;
+      op_params.float_activation_max = activation_max;
+      luci_interpreter_pal::L2Pool(op_params, getTensorShape(input()),
+                                   getTensorData<float>(input()), getTensorShape(output()),
+                                   getTensorData<float>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.h
new file mode 100644
index 000000000..d40f5f478
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_L2POOL2D_H
+#define LUCI_INTERPRETER_KERNELS_L2POOL2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class L2Pool2D : public KernelWithParams<Pool2DParams>
+{
+public:
+  L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  int32_t _padding_height = 0;
+  int32_t _padding_width = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_L2POOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp
new file mode 100644
index 000000000..7245456cb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/L2Pool2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class L2Pool2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(L2Pool2DTest, FloatNone)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.5, 6.5};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatRelu)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    -1, -6, 2,  4, //
+    -3, -2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::RELU;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.53553, 6.5};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatRelu1)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    -0.1, -0.6, 2,  4, //
+    -0.3, -0.2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::RELU_N1_TO_1;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.353553, 1.0};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatRelu6)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    -0.1, -0.6, 2,  4, //
+    -0.3, -0.2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::RELU6;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.353553, 6.0};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatPaddingSame)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::SAME;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.5, 6.5};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatPaddingSameStride)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::SAME;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.5, 6.0, 6.5, 5.70088, 2.54951, 7.2111, 8.63134, 7.0};
+  // NOTE with NEON+ruy, error is #1=-1.14441e-05, #6=-1.81198e-05
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, 1.0e-4f));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatPaddingValidStride)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.5, 6.0, 6.5};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, InvalidInputShape_NEG)
+{
+  Shape input_shape{1, 2, 4};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(L2Pool2DTest, InvalidInputOutputType_NEG)
+{
+  Shape input_shape{1, 2, 4};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.cpp
new file mode 100644
index 000000000..3833a55e8
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LeakyRelu.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+#include "PALLeakyRelu.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LeakyRelu::LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams &params)
+  : KernelWithParams<LeakyReluParams>({input}, {output}, params)
+{
+}
+
+void LeakyRelu::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::U8)
+  {
+    double alpha_multiplier = input()->scale() * params().alpha / output()->scale();
+    quantizeMultiplier(alpha_multiplier, &_output_multiplier_alpha, &_output_shift_alpha);
+    double identity_multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+  }
+  output()->resize(input()->shape());
+}
+
+void LeakyRelu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void LeakyRelu::evalFloat() const
+{
+  tflite::LeakyReluParams op_params{};
+  op_params.alpha = params().alpha;
+  luci_interpreter_pal::LeakyRelu(op_params, getTensorShape(input()), getTensorData<float>(input()),
+                                  getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void LeakyRelu::evalQuantized() const
+{
+  tflite::LeakyReluParams op_params{};
+  op_params.input_offset = input()->zero_point();
+  op_params.output_offset = output()->zero_point();
+  op_params.output_multiplier_alpha = _output_multiplier_alpha;
+  op_params.output_shift_alpha = _output_shift_alpha;
+  op_params.output_multiplier_identity = _output_multiplier_identity;
+  op_params.output_shift_identity = _output_shift_identity;
+
+  tflite::reference_ops::QuantizeLeakyRelu(
+    op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()),
+    getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.h b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.h
new file mode 100644
index 000000000..e66f404df
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LEAKYRELU_H
+#define LUCI_INTERPRETER_KERNELS_LEAKYRELU_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LeakyRelu : public KernelWithParams<LeakyReluParams>
+{
+public:
+  LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _output_multiplier_alpha = 0;
+  int _output_shift_alpha = 0;
+  int32_t _output_multiplier_identity = 0;
+  int _output_shift_identity = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LEAKYRELU_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp
new file mode 100644
index 000000000..0f6263b57
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LeakyRelu.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data,
+           float alpha)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  LeakyReluParams params{};
+  params.alpha = alpha;
+
+  LeakyRelu kernel(&input_tensor, &output_tensor, params);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<float> output_data, float alpha)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  const float quantized_tolerance = getTolerance(-8, 127.f / 16.f, 255);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-8, 127.f / 16.f);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  LeakyReluParams params{};
+  params.alpha = alpha;
+
+  LeakyRelu kernel(&input_tensor, &output_tensor, params);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, quantized_tolerance));
+}
+
+template <typename T> class LeakReluTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(LeakReluTest, DataTypes);
+
+TYPED_TEST(LeakReluTest, Simple)
+{
+  Check<TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3},
+                   /*input_data=*/
+                   {
+                     0.0f, 1.0f, 3.0f,   // Row 1
+                     1.0f, -1.0f, -2.0f, // Row 2
+                   },
+                   /*output_data=*/
+                   {
+                     0.0f, 1.0f, 3.0f,   // Row 1
+                     1.0f, -0.5f, -1.0f, // Row 2
+                   },
+                   /*alpha=*/0.5f);
+
+  SUCCEED();
+}
+
+TEST(LeakReluTest, IvalidInputOutputType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3},
+                                                           {
+                                                             0.0f, 1.0f, 3.0f,   // Row 1
+                                                             1.0f, -1.0f, -2.0f, // Row 2
+                                                           },
+                                                           memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  LeakyReluParams params{};
+  params.alpha = 0.5f;
+
+  LeakyRelu kernel(&input_tensor, &output_tensor, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Less.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Less.cpp
new file mode 100644
index 000000000..8d26ff297
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Less.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Less.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Less::Less(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Less::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Less::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Less::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLess(op_params, getTensorShape(x()), x_data,
+                                               getTensorShape(y()), y_data,
+                                               getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::Less(op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data,
+                                getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void Less::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessNoScaling(op_params, getTensorShape(x()), x_data,
+                                                        getTensorShape(y()), y_data,
+                                                        getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessNoScaling(op_params, getTensorShape(x()), x_data,
+                                         getTensorShape(y()), y_data, getTensorShape(output()),
+                                         output_data);
+  }
+}
+
+void Less::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessWithScaling(op_params, getTensorShape(x()), x_data,
+                                                          getTensorShape(y()), y_data,
+                                                          getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessWithScaling(op_params, getTensorShape(x()), x_data,
+                                           getTensorShape(y()), y_data, getTensorShape(output()),
+                                           output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Less.h b/compiler/luci-micro/luci-interpreter/src/kernels/Less.h
new file mode 100644
index 000000000..e27bb689c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Less.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LESS_H
+#define LUCI_INTERPRETER_KERNELS_LESS_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Less : public Kernel
+{
+public:
+  Less(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LESS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Less.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Less.test.cpp
new file mode 100644
index 000000000..8c5963363
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Less.test.cpp
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Less.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LessTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LessTest, FloatSimple)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, false, // Row 1
+    false, false, true,  // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(LessTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, false, // Row 1
+    false, true,  true,  // Row 2
+    true,  true,  false, // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+  std::vector<bool> ref_output_data{true, false, false};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -4, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value + 1, -2, max_value - 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, true,  // Row 1
+    false, false, false, // Row 2
+    false, true,  true,  // Row 3
+    true,  false, false, // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(LessTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(LessTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(LessTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.55, 0.5, // Row 1
+    -1,  0.05, 0,    1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, false, false, // Row 1
+    false, true,  false, true,  // Row 2
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessTest, Uint8QuantizedRescale)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, false, false, // Row 1
+    false, true,  false, true,  // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
+
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+  };
+
+  std::vector<float> y_data{
+    -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  false, true, // Row 1
+    false, false, false, true, // Row 2
+    false, true,  false, true, // Row 3
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.cpp
new file mode 100644
index 000000000..b474bc47a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LessEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LessEqual::LessEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void LessEqual::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void LessEqual::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void LessEqual::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessEqual(op_params, getTensorShape(x()), x_data,
+                                                    getTensorShape(y()), y_data,
+                                                    getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                     y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void LessEqual::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                             getTensorShape(y()), y_data,
+                                                             getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                              getTensorShape(y()), y_data, getTensorShape(output()),
+                                              output_data);
+  }
+}
+
+void LessEqual::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessEqualWithScaling(
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                                getTensorShape(y()), y_data,
+                                                getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.h b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.h
new file mode 100644
index 000000000..f82ea90d4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LessEqual : public Kernel
+{
+public:
+  LessEqual(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.test.cpp
new file mode 100644
index 000000000..b2e2fa7a1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.test.cpp
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LessEqual.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LessEqualTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LessEqualTest, FloatSimple)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  true, false, // Row 1
+    false, true, true,  // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(LessEqualTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  true, false, // Row 1
+    false, true, true,  // Row 2
+    true,  true, false, // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+  std::vector<bool> ref_output_data{true, false, true};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -4, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value + 1, -2, max_value - 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, true,  // Row 1
+    false, false, false, // Row 2
+    false, true,  true,  // Row 3
+    true,  true,  false, // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(LessEqualTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(LessEqualTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(LessEqualTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.55, 0.5, // Row 1
+    -1,  0.05, 0,    1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  true, false, false, // Row 1
+    false, true, false, true,  // Row 2
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessEqualTest, Uint8QuantizedRescale)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  true, false, false, // Row 1
+    false, true, false, true,  // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
+
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessEqualTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+  };
+
+  std::vector<float> y_data{
+    -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  false, true, // Row 1
+    false, false, true,  true, // Row 2
+    false, true,  false, true, // Row 3
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessEqualTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
new file mode 100644
index 000000000..a2bf442b0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LocalResponseNormalization.h"
+
+#include "kernels/Utils.h"
+
+#include "PALLocalResponseNormalization.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LocalResponseNormalization::LocalResponseNormalization(
+  const Tensor *input, Tensor *output, const LocalResponseNormalizationParams &params)
+  : KernelWithParams<LocalResponseNormalizationParams>({input}, {output}, params)
+{
+}
+
+void LocalResponseNormalization::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  output()->resize(input()->shape());
+}
+
+void LocalResponseNormalization::execute() const
+{
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::LocalResponseNormalizationParams op_params;
+      op_params.range = params().radius;
+      op_params.bias = params().bias;
+      op_params.alpha = params().alpha;
+      op_params.beta = params().beta;
+      luci_interpreter_pal::LocalResponseNormalization(
+        op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(output()),
+        getTensorData<float>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h
new file mode 100644
index 000000000..60408a104
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H
+#define LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LocalResponseNormalization : public KernelWithParams<LocalResponseNormalizationParams>
+{
+public:
+  LocalResponseNormalization(const Tensor *input, Tensor *output,
+                             const LocalResponseNormalizationParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
new file mode 100644
index 000000000..4a9d4739f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LocalResponseNormalization.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LocalResponseNormalizationTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LocalResponseNormalizationTest, SameAsL2Norm)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 0.0;
+  params.alpha = 1.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05}));
+}
+
+TEST_F(LocalResponseNormalizationTest, WithAlpha)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 0.0;
+  params.alpha = 4.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({-0.275, 0.15, 0.175, 0.3, -0.175, 0.025}));
+}
+
+TEST_F(LocalResponseNormalizationTest, WithBias)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 9.0;
+  params.alpha = 4.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({-0.22, 0.12, 0.14, 0.24, -0.14, 0.02}));
+}
+
+TEST_F(LocalResponseNormalizationTest, SmallRadius)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 2;
+  params.bias = 9.0;
+  params.alpha = 4.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({-0.264926, 0.125109, 0.140112, 0.267261, -0.161788, 0.0244266}));
+}
+
+TEST_F(LocalResponseNormalizationTest, InvalidInputDimension_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 0.0;
+  params.alpha = 1.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LocalResponseNormalizationTest, InvalidInputOutputType_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 0.0;
+  params.alpha = 1.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.cpp
new file mode 100644
index 000000000..79c315338
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogSoftmax.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/log_softmax.h>
+
+#include "PALLogSoftmax.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogSoftmax::LogSoftmax(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void LogSoftmax::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(output()->scale() == 16. / 256);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == 255);
+
+    tflite::SoftmaxParams params{};
+
+    params.table = _table;
+    params.beta = 1.0;
+    luci_interpreter_pal::PopulateSoftmaxLookupTable(&params, input()->scale(), params.beta);
+  }
+  output()->resize(input()->shape());
+}
+
+void LogSoftmax::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void LogSoftmax::evalFloat() const
+{
+  tflite::SoftmaxParams params{};
+  tflite::reference_ops::LogSoftmax(params, getTensorShape(input()), getTensorData<float>(input()),
+                                    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void LogSoftmax::evalQuantized() const
+{
+  const auto input_shape = getTensorShape(input());
+  const auto output_shape = getTensorShape(output());
+  const auto input_scale = input()->scale();
+  uint8_t *output_data = getTensorData<uint8_t>(output());
+  const uint8_t *input_data = getTensorData<uint8_t>(input());
+  const float beta = 1.0;
+
+  tflite::SoftmaxParams params{};
+
+  params.table = const_cast<float *>(_table);
+  params.zero_point = output()->zero_point();
+  params.scale = output()->scale();
+
+  luci_interpreter_pal::InitializeParams(&params, input_scale, beta);
+  luci_interpreter_pal::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
+                                   output_data);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.h
new file mode 100644
index 000000000..18477fbe3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H
+#define LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LogSoftmax : public Kernel
+{
+public:
+  LogSoftmax(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+  float _table[256];
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp
new file mode 100644
index 000000000..50dcd5c28
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogSoftmax.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LogSoftmaxTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogSoftmaxTest, Float)
+{
+  Shape input_shape{2, 4};
+  std::vector<float> input_data{
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    -4.14297, -10.14297, -2.14297,   -.142971, //
+    -7.00104, -12.00104, -.00104087, -9.00104, //
+  };
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(LogSoftmaxTest, Uint8)
+{
+  float kMin = -10;
+  float kMax = 10;
+  float kLogSoftmaxQuantizedTolerance = 16. / 256;
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax);
+  std::vector<float> input_data{
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second,
+                                                      input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    -4.14297, -10.14297, -2.14297,   -.142971, //
+    -7.00104, -12.00104, -.00104087, -9.00104, //
+  };
+  std::vector<int32_t> ref_output_shape{2, 4};
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kLogSoftmaxQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({189, 93, 221, 253, 142, 63, 255, 111}));
+}
+
+TEST_F(LogSoftmaxTest, InvalidInputOutputType_NEG)
+{
+  std::vector<float> input_data{
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 4}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LogSoftmaxTest, InvalidOutputQuantParam_NEG)
+{
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-10, 10);
+  std::vector<float> input_data{
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second,
+                                                      input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 20. / 256, 255);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.cpp
new file mode 100644
index 000000000..8e7263231
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalAnd.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogicalAnd::LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void LogicalAnd::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void LogicalAnd::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::BOOL:
+      evalLogicalAnd();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+inline void LogicalAnd::evalLogicalAnd() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()),
+                        getTensorShape(input2()), getTensorData<bool>(input2()),
+                        getTensorShape(output()), getTensorData<bool>(output()),
+                        [](bool x, bool y) { return x && y; });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.h
new file mode 100644
index 000000000..46b889986
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGICALAND_H
+#define LUCI_INTERPRETER_KERNELS_LOGICALAND_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LogicalAnd : public Kernel
+{
+public:
+  LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  inline void evalLogicalAnd() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGICALAND_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp
new file mode 100644
index 000000000..21b7951e0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalAnd.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LogicalAndTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogicalAndTest, Basic)
+{
+  Shape input_shape{1, 1, 1, 4};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::BOOL>(input_shape, {true, false, true, false}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor),
+              ::testing::ElementsAre(true, false, false, false));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalAndTest, Broadcast)
+{
+  Tensor input_tensor1 = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+                                                         _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {true}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor),
+              ::testing::ElementsAre(true, false, false, true));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalAndTest, MismatchInputType_NEG)
+{
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LogicalAndTest, InputTypeInvalid_NEG)
+{
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.cpp
new file mode 100644
index 000000000..65ab961aa
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalNot.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogicalNot::LogicalNot(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void LogicalNot::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  output()->resize(input()->shape());
+}
+
+void LogicalNot::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::BOOL:
+      evalLogicalNot();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+inline void LogicalNot::evalLogicalNot() const
+{
+  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+  bool *output_data = getTensorData<bool>(output());
+  const bool *input_data = getTensorData<bool>(input());
+  for (int i = 0; i < size; ++i)
+  {
+    output_data[i] = !input_data[i];
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.h
new file mode 100644
index 000000000..1608fafa5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGICALNOT_H
+#define LUCI_INTERPRETER_KERNELS_LOGICALNOT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LogicalNot : public Kernel
+{
+public:
+  LogicalNot(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  inline void evalLogicalNot() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGICALNOT_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp
new file mode 100644
index 000000000..3cbf27f6b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalNot.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LogicalNotTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogicalNotTest, Basic)
+{
+  Shape input_shape{1, 1, 1, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalNot kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor),
+              ::testing::ElementsAre(false, true, true, false));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalNotTest, OutputTypeInvalid_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+                                                        _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  LogicalNot kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LogicalNotTest, InputTypeInvalid_NEG)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalNot kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.cpp
new file mode 100644
index 000000000..f289ca64f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalOr.h"
+
+#include "kernels/Utils.h"
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogicalOr::LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void LogicalOr::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == DataType::BOOL);
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void LogicalOr::execute() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()),
+                        getTensorShape(input2()), getTensorData<bool>(input2()),
+                        getTensorShape(output()), getTensorData<bool>(output()),
+                        [](bool x, bool y) { return x || y; });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.h
new file mode 100644
index 000000000..88606483f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGICALOR_H
+#define LUCI_INTERPRETER_KERNELS_LOGICALOR_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LogicalOr : public Kernel
+{
+public:
+  LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGICALOR_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp
new file mode 100644
index 000000000..d65a69a5e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalOr.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LogicalOrTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogicalOrTest, Basic)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+                                                         _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, true, false},
+                                                         _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor),
+              ::testing::ElementsAre(true, false, true, true));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalOrTest, Broadcast)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+                                                         _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor),
+              ::testing::ElementsAre(true, false, false, true));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalOrTest, MismatchInputType_NEG)
+{
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LogicalOrTest, InputTypeInvalid_NEG)
+{
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.cpp
new file mode 100644
index 000000000..58e4f185d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Logistic.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/logistic.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Logistic::Logistic(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Logistic::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(output()->scale() == 1. / 256);
+    populateLookupTable();
+  }
+  output()->resize(input()->shape());
+}
+
+void Logistic::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Logistic::evalFloat() const
+{
+  tflite::reference_ops::Logistic(getTensorShape(input()), getTensorData<float>(input()),
+                                  getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void Logistic::evalQuantized() const
+{
+  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+  uint8_t *output_data = getTensorData<uint8_t>(output());
+  const uint8_t *input_data = getTensorData<uint8_t>(input());
+  for (int i = 0; i < size; ++i)
+  {
+    output_data[i] = getTableValue(input_data[i]);
+  }
+}
+
+void Logistic::populateLookupTable()
+{
+  const auto input_scale = static_cast<double>(input()->scale());
+  const auto input_zero_point = static_cast<int32_t>(input()->zero_point());
+  const auto output_scale = static_cast<double>(output()->scale());
+  const auto output_zero_point = static_cast<int32_t>(output()->zero_point());
+  const float inverse_scale = 1 / output_scale;
+  int32_t maxval = std::numeric_limits<uint8_t>::max();
+  int32_t minval = std::numeric_limits<uint8_t>::min();
+  for (int32_t val = minval; val <= maxval; ++val)
+  {
+    const float dequantized = input_scale * (val - input_zero_point);
+    const float transformed = 1.0f / (1.0f + std::exp(-dequantized));
+    const float rescaled = std::round(transformed * inverse_scale);
+    const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+    setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)),
+                  static_cast<uint8_t>(val));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.h b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.h
new file mode 100644
index 000000000..31de6adf0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGISTIC_H
+#define LUCI_INTERPRETER_KERNELS_LOGISTIC_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Logistic : public Kernel
+{
+public:
+  Logistic(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void populateLookupTable();
+  void setTableValue(uint8_t value, uint8_t idx) { _table[idx] = value; };
+  uint8_t getTableValue(uint8_t idx) const { return _table[idx]; };
+
+private:
+  uint8_t _table[256]{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGISTIC_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.test.cpp
new file mode 100644
index 000000000..5a1ea669c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.test.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Logistic.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<getElementType<T>()>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(getElementType<T>());
+
+  Logistic kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::pair<float, int32_t> input_quant_param =
+    quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
+
+  Logistic kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale() * 2));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class LogisticTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(LogisticTest, DataTypes);
+
+TYPED_TEST(LogisticTest, Simple)
+{
+  Check<TypeParam>(
+    {89}, {89},
+    {-10.0000000000, -9.7727272727, -9.5454545455, -9.3181818182, -9.0909090909, -8.8636363636,
+     -8.6363636364,  -8.4090909091, -8.1818181818, -7.9545454545, -7.7272727273, -7.5000000000,
+     -7.2727272727,  -7.0454545455, -6.8181818182, -6.5909090909, -6.3636363636, -6.1363636364,
+     -5.9090909091,  -5.6818181818, -5.4545454545, -5.2272727273, -5.0000000000, -4.7727272727,
+     -4.5454545455,  -4.3181818182, -4.0909090909, -3.8636363636, -3.6363636364, -3.4090909091,
+     -3.1818181818,  -2.9545454545, -2.7272727273, -2.5000000000, -2.2727272727, -2.0454545455,
+     -1.8181818182,  -1.5909090909, -1.3636363636, -1.1363636364, -0.9090909091, -0.6818181818,
+     -0.4545454545,  -0.2272727273, 0.0000000000,  0.2272727273,  0.4545454545,  0.6818181818,
+     0.9090909091,   1.1363636364,  1.3636363636,  1.5909090909,  1.8181818182,  2.0454545455,
+     2.2727272727,   2.5000000000,  2.7272727273,  2.9545454545,  3.1818181818,  3.4090909091,
+     3.6363636364,   3.8636363636,  4.0909090909,  4.3181818182,  4.5454545455,  4.7727272727,
+     5.0000000000,   5.2272727273,  5.4545454545,  5.6818181818,  5.9090909091,  6.1363636364,
+     6.3636363636,   6.5909090909,  6.8181818182,  7.0454545455,  7.2727272727,  7.5000000000,
+     7.7272727273,   7.9545454545,  8.1818181818,  8.4090909091,  8.6363636364,  8.8636363636,
+     9.0909090909,   9.3181818182,  9.5454545455,  9.7727272727,  10.0000000000},
+    {0.0000453979, 0.0000569815, 0.0000715205, 0.0000897689, 0.0001126729, 0.0001414198,
+     0.0001774998, 0.0002227827, 0.0002796147, 0.0003509396, 0.0004404502, 0.0005527786,
+     0.0006937345, 0.0008706021, 0.0010925128, 0.0013709094, 0.0017201256, 0.0021581065,
+     0.0027073042, 0.0033957870, 0.0042586071, 0.0053394826, 0.0066928509, 0.0083863576,
+     0.0105038445, 0.0131488902, 0.0164489307, 0.0205599431, 0.0256715863, 0.0320125562,
+     0.0398556989, 0.0495221198, 0.0613831074, 0.0758581800, 0.0934070047, 0.1145124805,
+     0.1396521834, 0.1692560327, 0.2036499335, 0.2429886272, 0.2871859014, 0.3358556241,
+     0.3882805886, 0.4434251301, 0.5000000000, 0.5565748699, 0.6117194114, 0.6641443759,
+     0.7128140986, 0.7570113728, 0.7963500665, 0.8307439673, 0.8603478166, 0.8854875195,
+     0.9065929953, 0.9241418200, 0.9386168926, 0.9504778802, 0.9601443011, 0.9679874438,
+     0.9743284137, 0.9794400569, 0.9835510693, 0.9868511098, 0.9894961555, 0.9916136424,
+     0.9933071491, 0.9946605174, 0.9957413929, 0.9966042130, 0.9972926958, 0.9978418935,
+     0.9982798744, 0.9986290906, 0.9989074872, 0.9991293979, 0.9993062655, 0.9994472214,
+     0.9995595498, 0.9996490604, 0.9997203853, 0.9997772173, 0.9998225002, 0.9998585802,
+     0.9998873271, 0.9999102311, 0.9999284795, 0.9999430185, 0.9999546021});
+}
+
+TEST(LogisticTest, IvalidInputOutputType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape = {1};
+  std::vector<float> input_data{10};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
+
+  Logistic kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(LogisticTest, IvalidQuantParam_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Shape input_shape = {2};
+  std::vector<float> input_data{-10, 10};
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-10, 10);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 255, 0);
+
+  Logistic kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.cpp
new file mode 100644
index 000000000..8d9760ff2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MaxPool2D.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+MaxPool2D::MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
+  : KernelWithParams<Pool2DParams>({input}, {output}, params)
+{
+}
+
+void MaxPool2D::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  assert(input()->shape().num_dims() == 4);
+  const Shape &input_shape = input()->shape();
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t depth = input_shape.dim(3);
+
+  const int32_t output_height =
+    computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height);
+  const int32_t output_width =
+    computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
+
+  _padding_height =
+    computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
+  _padding_width =
+    computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
+
+  output()->resize({batches, output_height, output_width, depth});
+  if (input()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
+  }
+  else if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+  }
+}
+
+void MaxPool2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalSInt16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void MaxPool2D::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  tflite::reference_ops::MaxPool(params, getTensorShape(input()), getTensorData<float>(input()),
+                                 getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void MaxPool2D::evalQuantized() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_ops::MaxPool(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                                 getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void MaxPool2D::evalSInt16() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_integer_ops::MaxPool(
+    params, getTensorShape(input()), getTensorData<int16_t>(input()), //
+    getTensorShape(output()), getTensorData<int16_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.h
new file mode 100644
index 000000000..bb7666305
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H
+#define LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class MaxPool2D : public KernelWithParams<Pool2DParams>
+{
+public:
+  MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalSInt16() const;
+
+private:
+  int32_t _padding_height{};
+  int32_t _padding_width{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp
new file mode 100644
index 000000000..44f2a222f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MaxPool2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MaxPool2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MaxPool2DTest, Float)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<float> input_data{
+    1,  -1, 0,  -2, 2,  //
+    -7, -6, -5, -4, -3, //
+    5,  4,  3,  6,  7,  //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  MaxPool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    1, 2, //
+    5, 6, //
+  };
+  std::initializer_list<int32_t> ref_output_shape{1, 2, 2, 1};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MaxPool2DTest, Uint8)
+{
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375, 15.9375);
+  std::vector<float> input_data{
+    0,  -6, 12, 4, //
+    -3, -2, 10, 7, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  MaxPool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.0, 6.0};
+  std::initializer_list<int32_t> ref_output_shape{1, 1, 2, 1};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MaxPool2DTest, SInt16)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<float> input_data{
+    1,  -1, 0,  -2, 2,  //
+    -7, -6, -5, -4, -3, //
+    5,  4,  3,  6,  7,  //
+  };
+  std::vector<float> ref_output_data{
+    1, 2, //
+    5, 6, //
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, 0.2, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  MaxPool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.cpp
new file mode 100644
index 000000000..b102b5e27
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Maximum.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Maximum::Maximum(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void Maximum::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Maximum::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalMaximum<float>();
+      break;
+    case DataType::U8:
+      evalMaximum<uint8_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> inline void Maximum::evalMaximum() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                        getTensorShape(input2()), getTensorData<T>(input2()),
+                        getTensorShape(output()), getTensorData<T>(output()),
+                        [](T x, T y) { return std::max(x, y); });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.h b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.h
new file mode 100644
index 000000000..3c99e69c7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MAXIMUM_H
+#define LUCI_INTERPRETER_KERNELS_MAXIMUM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Maximum : public Kernel
+{
+public:
+  Maximum(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> inline void evalMaximum() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MAXIMUM_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.test.cpp
new file mode 100644
index 000000000..e4a505b03
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.test.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Maximum.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MaximumTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MaximumTest, Float)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{1.0, 0.0, 1.0, 12.0, -2.0, -1.43};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(MaximumTest, Uint8)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23};
+  std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<int32_t> ref_output_shape{2, 4};
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({1, 0, 2, 12, 255, 23}));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mean.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.cpp
new file mode 100644
index 000000000..8e65e0d6d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.cpp
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mean.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reduce.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params)
+{
+  params->axis_count = num_axes;
+  for (int i = 0; i < num_axes; ++i)
+  {
+    params->axis[i] = static_cast<int16>(axes_data[i]);
+  }
+  for (int i = num_axes; i < 4; ++i)
+  {
+    params->axis[i] = 1;
+  }
+}
+
+// Returns the number of axes that will be reduced. Removes duplicates.
+static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
+{
+  int reduction_count = num_axes;
+  for (int i = 0; i < num_axes; ++i)
+  {
+    int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
+    assert(current >= 0 && current < input_num_dims);
+    for (int j = 0; j < i; j++)
+    {
+      int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims;
+      // This checks for duplicate axis
+      if (current == previous)
+      {
+        --reduction_count;
+        break;
+      }
+    }
+  }
+  return reduction_count;
+}
+
+static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
+                            bool keep_dims)
+{
+  int input_num_dims = input_shape.num_dims();
+  if (input_num_dims == 0)
+  {
+    return Shape(0);
+  }
+
+  if (keep_dims)
+  {
+    Shape output_shape(input_num_dims);
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          is_axis = true;
+          break;
+        }
+      }
+      if (is_axis)
+      {
+        output_shape.dim(idx) = 1;
+      }
+      else
+      {
+        output_shape.dim(idx) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+  else
+  {
+    int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
+    Shape output_shape(input_num_dims - num_reduce_axes);
+    int num_skip_axes = 0;
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          ++num_skip_axes;
+          is_axis = true;
+          break;
+        }
+      }
+      if (!is_axis)
+      {
+        output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+}
+
+Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+           Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params)
+  : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes, temp_sum},
+                                    params)
+{
+}
+
+void Mean::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
+  if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+  }
+
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+  assert(num_axes <= 4);
+
+  Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims);
+  output()->resize(output_shape);
+
+  tflite::MeanParams params{};
+  resolveAxes(axes_data, num_axes, &params);
+  _need_temporaries = !(
+    _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+    ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1)));
+  if (_need_temporaries)
+  {
+    auto temp_index = getOutputTensors()[1];
+    auto resolved_axes = getOutputTensors()[2];
+    auto temp_sum = getOutputTensors()[3];
+
+    temp_index->resize(Shape(input_num_dims));
+    resolved_axes->resize(Shape(num_axes));
+    temp_sum->resize(output()->shape());
+  }
+  else
+  {
+    auto temp_index = getOutputTensors()[1];
+    auto resolved_axes = getOutputTensors()[2];
+    auto temp_sum = getOutputTensors()[3];
+
+    temp_index->set_allocatable(false);
+    resolved_axes->set_allocatable(false);
+    temp_sum->set_allocatable(false);
+  }
+}
+
+void Mean::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Mean::evalFloat() const
+{
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+
+  tflite::MeanParams params{};
+  resolveAxes(axes_data, num_axes, &params);
+
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+  auto temp_sum = getOutputTensors()[3];
+
+  // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+  if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+      ((params.axis[0] == 1 && params.axis[1] == 2) ||
+       (params.axis[0] == 2 && params.axis[1] == 1)))
+  {
+    tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<float>(input()),
+                                getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Mean(getTensorData<float>(input()), getTensorShape(input()).DimsData(),
+                                input()->shape().num_dims(), getTensorData<float>(output()),
+                                getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+                                axes_data, num_axes, _params.keep_dims,
+                                getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+                                getTensorData<float>(temp_sum));
+  }
+}
+
+void Mean::evalQuantized() const
+{
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+
+  tflite::MeanParams params{};
+  resolveAxes(axes_data, num_axes, &params);
+
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+  auto temp_sum = getOutputTensors()[3];
+
+  // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+  if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+      ((params.axis[0] == 1 && params.axis[1] == 2) ||
+       (params.axis[0] == 2 && params.axis[1] == 1)))
+  {
+    tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                                input()->zero_point(), input()->scale(), getTensorShape(output()),
+                                getTensorData<uint8_t>(output()), output()->zero_point(),
+                                output()->scale());
+  }
+  else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale())
+  {
+    tflite::reference_ops::Mean(getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
+                                input()->shape().num_dims(), getTensorData<uint8_t>(output()),
+                                getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+                                axes_data, num_axes, _params.keep_dims,
+                                getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+                                getTensorData<int>(temp_sum));
+  }
+  else
+  {
+    tflite::reference_ops::QuantizedMeanOrSum<>(
+      getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
+      getTensorShape(input()).DimsData(), input()->shape().num_dims(),
+      getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
+      getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
+      _params.keep_dims, getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+      getTensorData<int>(temp_sum),
+      /*compute_sum=*/false);
+  }
+}
+
+void Mean::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  const int num_axes = axes()->shape().num_elements();
+
+  constexpr int32_t output_min = -std::numeric_limits<int16_t>::max();
+  constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
+
+  // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+  if (_params.keep_dims && input_shape.num_dims() == 4 && num_axes == 2 &&
+      ((axes_data[0] == 1 && axes_data[1] == 2) || (axes_data[0] == 2 && axes_data[1] == 1)))
+  {
+    const int32_t batches = input_shape.dim(0);
+    const int32_t input_height = input_shape.dim(1);
+    const int32_t input_width = input_shape.dim(2);
+    const int32_t depth = input_shape.dim(3);
+    assert(output_shape.num_dims() == 4);
+    assert(output_shape.dim(0) == batches);
+    assert(output_shape.dim(1) == 1);
+    assert(output_shape.dim(2) == 1);
+    assert(output_shape.dim(3) == depth);
+
+    const double real_multiplier =
+      static_cast<double>(input()->scale()) / static_cast<double>(output()->scale());
+
+    int32_t output_multiplier{};
+    int output_shift{};
+    quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+    const int32_t num_elements_in_axes = input_height * input_width;
+
+    for (int32_t batch = 0; batch < batches; ++batch)
+    {
+      for (int32_t c = 0; c < depth; ++c)
+      {
+        int32_t acc = 0;
+        for (int32_t in_y = 0; in_y < input_height; ++in_y)
+        {
+          for (int32_t in_x = 0; in_x < input_width; ++in_x)
+          {
+            acc += input_data[calcOffset(input_shape, batch, in_y, in_x, c)];
+          }
+        }
+        int32_t scaled_acc =
+          tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+        // Divide by the number of elements rounding to the nearest integer.
+        scaled_acc = scaled_acc > 0
+                       ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes
+                       : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes;
+
+        scaled_acc = std::max(scaled_acc, output_min);
+        scaled_acc = std::min(scaled_acc, output_max);
+
+        output_data[calcOffset(output_shape, batch, 0, 0, c)] = scaled_acc;
+      }
+    }
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported configuration.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mean.h b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.h
new file mode 100644
index 000000000..ed07ae561
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MEAN_H
+#define LUCI_INTERPRETER_KERNELS_MEAN_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Mean : public KernelWithParams<ReducerParams>
+{
+public:
+  Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+       Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axes() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedS16() const;
+
+private:
+  bool _need_temporaries = false;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MEAN_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mean.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.test.cpp
new file mode 100644
index 000000000..d2c00935a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.test.cpp
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mean.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MeanTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MeanTest, FloatKeepDims)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{0, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{10.5, 12.5, 14.5};
+  std::initializer_list<int32_t> ref_output_shape{1, 3, 1};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, FloatKeepDims4DMean)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 2, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{6, 7, 18, 19};
+  std::initializer_list<int32_t> ref_output_shape{2, 1, 1, 2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, FloatNotKeepDims)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{1, 0, -3, -3};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = false;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{12, 13};
+  std::initializer_list<int32_t> ref_output_shape{2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, Uint8KeepDims)
+{
+  float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255);
+  std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+
+  std::vector<int32_t> axis_data{1};
+  Tensor input_tensor = makeInputTensor<DataType::U8>({3, 2}, quant_param.first, quant_param.second,
+                                                      input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::U8, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.3, 0.35, 0.55};
+  std::initializer_list<int32_t> ref_output_shape{3, 1};
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, Uint8NotKeepDims)
+{
+  float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255);
+  std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+
+  std::vector<int32_t> axis_data{1};
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 2}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  ReducerParams params{};
+  params.keep_dims = false;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.4, 0.4};
+  std::initializer_list<int32_t> ref_output_shape{1, 2};
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, SInt16KeepDims4D)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+  std::vector<int32_t> axes_data{1, 2};
+  std::vector<float> ref_output_data{6, 7, 18, 19};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({2, 2, 3, 2}, 0.25, 0, input_data, _memory_manager.get());
+  Tensor axes_tensor = makeInputTensor<DataType::S32>({2}, axes_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Mean kernel(&input_tensor, &axes_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 1, 2}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.cpp
new file mode 100644
index 000000000..5d3dcde72
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Minimum.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Minimum::Minimum(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void Minimum::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Minimum::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalMinimum<float>();
+      break;
+    case DataType::U8:
+      evalMinimum<uint8_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> inline void Minimum::evalMinimum() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                        getTensorShape(input2()), getTensorData<T>(input2()),
+                        getTensorShape(output()), getTensorData<T>(output()),
+                        [](T x, T y) { return std::min(x, y); });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.h b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.h
new file mode 100644
index 000000000..5ff4035b4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MINIMUM_H
+#define LUCI_INTERPRETER_KERNELS_MINIMUM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Minimum : public Kernel
+{
+public:
+  Minimum(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> inline void evalMinimum() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MINIMUM_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.test.cpp
new file mode 100644
index 000000000..9a143643f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.test.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Minimum.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MinimumTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MinimumTest, Float)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{-1.0, 0.0, -1.0, 11.0, -3.0, -1.44};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(MinimumTest, Uint8)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23};
+  std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<int32_t> ref_output_shape{2, 4};
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({0, 0, 1, 11, 2, 1}));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.cpp
new file mode 100644
index 000000000..bae1eac70
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MirrorPad.h"
+
+#include "kernels/Utils.h"
+
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+MirrorPad::MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output,
+                     const MirrorPadParams &params)
+  : KernelWithParams<MirrorPadParams>({input, paddings}, {output}, params)
+{
+}
+
+void MirrorPad::configure()
+{
+  const Shape &input_shape = input()->shape();
+  const int num_dims = input_shape.num_dims();
+
+  if (num_dims > 4)
+    throw std::runtime_error("Unsupported number of dimensions.");
+
+  assert(output()->element_type() == input()->element_type());
+  assert(paddings()->element_type() == DataType::S32);
+  // Paddings shape should be [N, 2].
+  assert(paddings()->shape().num_dims() == 2);
+  assert(paddings()->shape().dim(0) == num_dims);
+  assert(paddings()->shape().dim(1) == 2);
+
+  Shape output_shape(num_dims);
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = 0; i < num_dims; ++i)
+  {
+    const int32_t padding_before = paddings_data[i * 2];
+    const int32_t padding_after = paddings_data[i * 2 + 1];
+    assert(padding_before >= 0 && padding_after >= 0);
+    output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
+  }
+
+  output()->resize(output_shape);
+}
+
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+                          Tensor &output);
+
+void MirrorPad::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+    {
+      MirrorPadImpl<float>(*input(), *paddings(), params().mode, *output());
+      break;
+    }
+    case DataType::U8:
+    {
+      assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
+      assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
+
+      MirrorPadImpl<uint8_t>(*input(), *paddings(), params().mode, *output());
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+                          Tensor &output)
+{
+  auto const input_dims = input.shape().num_dims();
+  auto const input_data = input.data<T>();
+  auto const paddings_data = paddings.data<int32_t>();
+  auto const output_data = output.data<T>();
+
+  auto const input_b = input_dims > 3 ? input.shape().dim(input_dims - 4) : 1;
+  auto const input_h = input_dims > 2 ? input.shape().dim(input_dims - 3) : 1;
+  auto const input_w = input_dims > 1 ? input.shape().dim(input_dims - 2) : 1;
+  auto const input_d = input.shape().dim(input_dims - 1);
+
+  auto const input_h_offset = input_d * input_w;
+  auto const input_b_offset = input_h_offset * input_h;
+
+  auto const output_b = input_dims > 3 ? output.shape().dim(input_dims - 4) : 1;
+  auto const output_h = input_dims > 2 ? output.shape().dim(input_dims - 3) : 1;
+  auto const output_w = input_dims > 1 ? output.shape().dim(input_dims - 2) : 1;
+  auto const output_d = output.shape().dim(input_dims - 1);
+
+  auto const left_b_pad = paddings_data[2 * (input_dims - 4)];
+  auto const left_h_pad = paddings_data[2 * (input_dims - 3)];
+  auto const left_w_pad = paddings_data[2 * (input_dims - 2)];
+  auto const left_d_pad = paddings_data[2 * (input_dims - 1)];
+
+  auto const right_b_pad = paddings_data[2 * (input_dims - 4) + 1];
+  auto const right_h_pad = paddings_data[2 * (input_dims - 3) + 1];
+  auto const right_w_pad = paddings_data[2 * (input_dims - 2) + 1];
+  auto const right_d_pad = paddings_data[2 * (input_dims - 1) + 1];
+
+  const auto positive_mod = [](auto a, auto b) { return (a % b + b) % b; };
+  const auto offset_index = [input_d, input_h_offset, input_b_offset](auto d, auto w, auto h,
+                                                                      auto b) {
+    return d + w * input_d + h * input_h_offset + b * input_b_offset;
+  };
+
+  const auto symmetric_dim = [&positive_mod](auto i, auto left_pad, auto input) {
+    bool reflected = (((i < left_pad ? i + 1 - input : i) - left_pad) / input & 1) == 1;
+    return positive_mod(reflected ? input + left_pad - i - 1 : i - left_pad, input);
+  };
+
+  const T *in_ptr = input_data;
+  T *out_ptr = output_data;
+
+  for (int32_t b = 0; b < output_b; ++b)
+  {
+    for (int32_t h = 0; h < output_h; ++h)
+    {
+      for (int32_t w = 0; w < output_w; ++w)
+      {
+        for (int32_t d = 0; d < output_d; ++d)
+        {
+          if (b < left_b_pad || b >= output_b - right_b_pad || //
+              h < left_h_pad || h >= output_h - right_h_pad || //
+              w < left_w_pad || w >= output_w - right_w_pad || //
+              d < left_d_pad || d >= output_d - right_d_pad)
+          {
+            if (mode == MirrorPadMode::REFLECT)
+            {
+              *out_ptr++ = input_data[offset_index(
+                positive_mod(d - left_d_pad, input_d), positive_mod(w - left_w_pad, input_w),
+                positive_mod(h - left_h_pad, input_h), positive_mod(b - left_b_pad, input_b))];
+            }
+            else
+            {
+              *out_ptr++ = input_data[offset_index(
+                symmetric_dim(d, left_d_pad, input_d), symmetric_dim(w, left_w_pad, input_w),
+                symmetric_dim(h, left_h_pad, input_h), symmetric_dim(b, left_b_pad, input_b))];
+            }
+          }
+          else
+          {
+            *out_ptr++ = *in_ptr++;
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.h b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.h
new file mode 100644
index 000000000..d3e6e858a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H
+#define LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class MirrorPad : public KernelWithParams<MirrorPadParams>
+{
+public:
+  MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output,
+            const MirrorPadParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *paddings() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp
new file mode 100644
index 000000000..740d8cb22
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MirrorPad.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MirrorPadTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  void Execute(const Tensor &input, const Tensor &padding, Tensor &output, MirrorPadMode mode)
+  {
+    MirrorPadParams params{};
+    params.mode = mode;
+
+    MirrorPad kernel(&input, &padding, &output, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output);
+    kernel.execute();
+  }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MirrorPadTest, FloatReflect)
+{
+  Shape input_shape = {1, 2, 2, 1};
+  Shape padding_shape = {4, 2};
+
+  std::vector<float> input_data{1.0f, 2.0f,  //
+                                3.0f, 4.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT);
+
+  std::vector<float> ref_output_data{2.0f, 1.0f, 2.0f, 1.0f, 2.0f,  //
+                                     4.0f, 3.0f, 4.0f, 3.0f, 4.0f,  //
+                                     2.0f, 1.0f, 2.0f, 1.0f, 2.0f,  //
+                                     4.0f, 3.0f, 4.0f, 3.0f, 4.0f,  //
+                                     2.0f, 1.0f, 2.0f, 1.0f, 2.0f}; //
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1};
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, FloatSymmetric)
+{
+  Shape input_shape = {1, 2, 2, 1};
+  Shape padding_shape = {4, 2};
+
+  std::vector<float> input_data{1.0f, 2.0f,  //
+                                3.0f, 4.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+  std::vector<float> ref_output_data{3.0, 3.0, 4.0, 4.0, 3.0,  //
+                                     1.0, 1.0, 2.0, 2.0, 1.0,  //
+                                     1.0, 1.0, 2.0, 2.0, 1.0,  //
+                                     3.0, 3.0, 4.0, 4.0, 3.0,  //
+                                     3.0, 3.0, 4.0, 4.0, 3.0}; //
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1};
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, FloatSymmetric2Dim)
+{
+  Shape input_shape = {3, 1};
+  Shape padding_shape = {2, 2};
+
+  std::vector<float> input_data{1.0f, 2.0f, 3.0f};
+  std::vector<int> padding_data{1, 2, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+  std::vector<float> ref_output_data{1.0, 1.0, 2.0, 3.0, 3.0, 2.0};
+  std::initializer_list<int32_t> ref_output_shape{6, 1};
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, Uint8Reflect)
+{
+  Shape input_shape = {1, 2, 3, 1};
+  Shape padding_shape = {4, 2};
+
+  float quant_tolerance = getTolerance(0.0f, 6.0f, 255);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f);
+
+  std::vector<float> input_data{1.0f, 2.0f, 3.0f,  //
+                                4.0f, 5.0f, 6.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0};
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT);
+
+  std::vector<float> ref_output_data{
+    3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+    6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, //
+    3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+    6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, //
+    3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+  };
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1};
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, quant_tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, Uint8Symmetric)
+{
+  Shape input_shape = {1, 2, 3, 1};
+  Shape padding_shape = {4, 2};
+
+  float quant_tolerance = getTolerance(0.0f, 6.0f, 255);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f);
+
+  std::vector<float> input_data{1.0f, 2.0f, 3.0f,  //
+                                4.0f, 5.0f, 6.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0};
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+  std::vector<float> ref_output_data{
+    4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+    1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, //
+    1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, //
+    4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+    4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+  };
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1};
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, quant_tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, UnsupportedDim_NEG)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 1}, {1.0f}, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>({5, 2}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT));
+}
+
+TEST_F(MirrorPadTest, InvalidInputType_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor padding_tensor = makeInputTensor<DataType::S32>({1, 2}, {0, 0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.cpp
new file mode 100644
index 000000000..531fb4fa1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mul.h"
+
+#include "kernels/BinaryOpCommon.h"
+#include "kernels/Utils.h"
+
+#include "PALMul.h"
+
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Mul::Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams &params)
+  : KernelWithParams<MulParams>({input1, input2}, {output}, params)
+{
+}
+
+void Mul::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type());
+  if (input1()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
+                           input2()->zero_points().size() == 1)
+    LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
+                           output()->zero_point() == 0);
+  }
+
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Mul::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Mul::evalFloat() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<float>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    luci_interpreter_pal::BroadcastMul4DSlow(
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                              getTensorShape(input2()), getTensorData<float>(input2()),
+                              getTensorShape(output()), getTensorData<float>(output()));
+  }
+}
+
+template <typename T> void Mul::evalInteger() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    luci_interpreter_pal::BroadcastMul4DSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                              getTensorShape(input2()), getTensorData<T>(input2()),
+                              getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+void Mul::evalQuantizedS16() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const double real_multiplier = input1_scale * input2_scale / output_scale;
+
+  int32_t output_multiplier;
+  int output_shift;
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  auto fn = [output_multiplier, output_shift, activation_min, activation_max](int16_t input1_val,
+                                                                              int16_t input2_val) {
+    int32_t output = static_cast<int32_t>(input1_val) * static_cast<int32_t>(input2_val);
+    output = tflite::MultiplyByQuantizedMultiplier(output, output_multiplier, output_shift);
+    output = std::max(output, activation_min);
+    output = std::min(output, activation_max);
+    return static_cast<int16_t>(output);
+  };
+
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
+                        getTensorShape(input2()), getTensorData<int16_t>(input2()),
+                        getTensorShape(output()), getTensorData<int16_t>(output()), fn);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mul.h b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.h
new file mode 100644
index 000000000..c0cf817df
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MUL_H
+#define LUCI_INTERPRETER_KERNELS_MUL_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Mul : public KernelWithParams<MulParams>
+{
+public:
+  Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams &params);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantizedS16() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MUL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mul.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.test.cpp
new file mode 100644
index 000000000..fc0e60614
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.test.cpp
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mul.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MulTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MulTest, Float)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<float>> test_outputs = {
+    {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f,
+     0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f,
+     0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f},
+    {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f},
+    {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f,
+     0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f,
+     0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f},
+    {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
+  std::vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                                 1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+      << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+      << "With shape number " << i;
+  }
+}
+
+template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+
+  dtype max_value = std::numeric_limits<dtype>::max();
+  dtype res_max = max_value - max_value % 10;
+
+  std::vector<std::vector<dtype>> test_outputs = {
+    {8,  0, 20,  0, 4,  30,  //
+     16, 0, 40,  3, 8,  0,   //
+     0,  0, 0,   6, 0,  0,   //
+     4,  0, 10,  9, 2,  0,   //
+     40, 0, 100, 0, 20, 150, //
+     28, 0, 70,  0, 14, res_max},
+    {8, 0, 40, 3, 0, 0, 4, 0, 100, 0, 14, res_max},
+    {8,  12,     0, 0, 20, 30, 16, 0, 0, 0,  40, 0,   0,   0, 0, 0,  0,
+     0,  0,      9, 2, 0,  10, 0,  0, 0, 20, 30, 100, 150, 0, 0, 14, max_value / 10 * 2,
+     70, res_max},
+    {8, 12, 0, 0, 0, 0, 0, 9, 20, 30, 70, res_max}};
+  std::vector<dtype> input1_data{2, 3, 4, -1, -3, -2, 1, -3, 10, 15, 7, max_value / 10};
+  std::vector<dtype> input2_data{4, 0, 10, -3, 2, 10};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+}
+
+TEST_F(MulTest, SInt64)
+{
+  checkInteger<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(MulTest, SInt32)
+{
+  checkInteger<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(MulTest, SInt16)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<int32_t>> ref_output_shapes{
+    {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+
+  std::vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                                 1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  std::vector<std::vector<float>> ref_outputs = {
+    {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f,
+     0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f,
+     0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f},
+    {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f},
+    {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f,
+     0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f,
+     0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f},
+    {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data,
+                                                          _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0,
+                                                          input2_data, _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0);
+    const float tolerance = output_tensor.scale() * 2;
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorShape(output_tensor),
+                ::testing::ElementsAreArray(ref_output_shapes[i]))
+      << "With shape number " << i;
+    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
+      << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs and different scales.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0,
+                                                          input2_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data,
+                                                          _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::S16, 3.0 / 32767, 0);
+    const float tolerance = output_tensor.scale() * 2;
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorShape(output_tensor),
+                ::testing::ElementsAreArray(ref_output_shapes[i]))
+      << "With shape number " << i;
+    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
+      << "With shape number " << i;
+  }
+}
+
+TEST_F(MulTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  MulParams params{};
+  params.activation = Activation::RELU;
+
+  Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(MulTest, Invalid_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  MulParams params{};
+  params.activation = Activation::RELU;
+
+  Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(MulTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U64);
+
+  MulParams params{};
+  params.activation = Activation::RELU;
+
+  Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(MulTest, Invalid_Quantization_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+  MulParams params{};
+  params.activation = Activation::NONE;
+
+  Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Neg.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.cpp
new file mode 100644
index 000000000..c6fe08a9e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Neg.h"
+#include "kernels/Utils.h"
+
+#include "PALNeg.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Neg::Neg(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Neg::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  output()->resize(input()->shape());
+}
+
+void Neg::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Neg::evalFloat() const
+{
+  luci_interpreter_pal::Negate(getTensorShape(input()), getTensorData<float>(input()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Neg.h b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.h
new file mode 100644
index 000000000..69fa1a18e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_NEG_H
+#define LUCI_INTERPRETER_KERNELS_NEG_H
+
+#include "core/Kernel.h"
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Neg : public Kernel
+{
+public:
+  Neg(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_NEG_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Neg.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.test.cpp
new file mode 100644
index 000000000..8b2bc1a82
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.test.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Neg.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T> input_data, std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  Neg kernel(&input_tensor, &output_tensor);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(NegTest, FloatSimple)
+{
+  Check<float>(/*input_shape=*/{2, 3},
+               /*output_shape=*/{2, 3},
+               /*input_data=*/
+               {
+                 0.0f, 1.0f, 3.0f,   // Row 1
+                 1.0f, -1.0f, -2.0f, // Row 2
+               },
+               /*output_data=*/
+               {
+                 0.0f, -1.0f, -3.0f, // Row 1
+                 -1.0f, 1.0f, 2.0f,  // Row 2
+               });
+
+  SUCCEED();
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.cpp
new file mode 100644
index 000000000..54e5eee34
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/NotEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+NotEqual::NotEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void NotEqual::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void NotEqual::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void NotEqual::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowNotEqual(op_params, getTensorShape(x()), x_data,
+                                                   getTensorShape(y()), y_data,
+                                                   getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::NotEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                    y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void NotEqual::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowNotEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                            getTensorShape(y()), y_data,
+                                                            getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::NotEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                             getTensorShape(y()), y_data, getTensorShape(output()),
+                                             output_data);
+  }
+}
+
+void NotEqual::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowNotEqualWithScaling(
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
+  }
+  else
+  {
+    tflite::reference_ops::NotEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                               getTensorShape(y()), y_data,
+                                               getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.h b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.h
new file mode 100644
index 000000000..d2aafe893
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class NotEqual : public Kernel
+{
+public:
+  NotEqual(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.test.cpp
new file mode 100644
index 000000000..45bf4022a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.test.cpp
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/NotEqual.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class NotEqualTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(NotEqualTest, FloatSimple)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true, false, true, // Row 1
+    true, false, true, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(NotEqualTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+    0.9, 0.7, 0.5, // Row 4
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, true,  // Row 1
+    true,  true,  true,  // Row 2
+    true,  true,  true,  // Row 3
+    false, false, false, // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value, -2, max_value};
+
+  std::vector<bool> ref_output_data{false, true, false};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -2, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value, -2, max_value, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  true,  // Row 1
+    true,  true,  false, // Row 2
+    true,  false, true,  // Row 3
+    false, false, false, // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(NotEqualTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(NotEqualTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(NotEqualTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+    0.5, 0.5, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.5, 0.55, 0.5, // Row 1
+    -1,  0,   0.05, 1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true, false, true,  true, // Row 1
+    true, false, false, true, // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2);
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(NotEqualTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+    -1,   0.05, 0,    1,   // Row 4
+  };
+
+  std::vector<float> y_data{
+    -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  true,  true,  true,  // Row 1
+    true,  true,  false, true,  // Row 2
+    true,  true,  true,  true,  // Row 3
+    false, false, false, false, // Row 4
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(NotEqualTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.cpp
new file mode 100644
index 000000000..4d3e5f2ef
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/OneHot.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename T>
+void OneHotComputeImpl(const Tensor *indices_tensor, const Tensor *on_value_tensor,
+                       const Tensor *off_value_tensor, int32_t depth, int32_t axis,
+                       Tensor *output_tensor)
+{
+  // define input shape and correct axis
+  auto const &input_shape = indices_tensor->shape();
+  axis = axis == -1 ? input_shape.num_dims() : axis;
+
+  // TODO support other integer input types
+  auto const *indices = getTensorData<int32_t>(indices_tensor);
+  auto const on_value = getTensorData<T>(on_value_tensor)[0];
+  auto const off_value = getTensorData<T>(off_value_tensor)[0];
+  auto *output = getTensorData<T>(output_tensor);
+
+  // prefix_dim_size == # of elements before the axis
+  // depth == # of elements per axis
+  // suffix_dim_size == # of elements after the axis
+  auto prefix_dim_size = 1;
+  for (int32_t i = 0; i < axis; ++i)
+  {
+    prefix_dim_size *= input_shape.dim(i);
+  }
+  assert(prefix_dim_size > 0);
+  auto const suffix_dim_size = input_shape.num_elements() / prefix_dim_size;
+
+  // View the indices as a matrix of size:
+  //     prefix_dim_size x suffix_dim_size
+  // View the output as a matrix of size:
+  //     prefix_dim_size x depth x suffix_dim_size
+  // Then the output is:
+  //     output(i, j, k) == (indices(i, k) == j) ? on : off
+  for (int32_t i = 0; i < prefix_dim_size; ++i)
+    for (int32_t j = 0; j < depth; ++j)
+      for (int32_t k = 0; k < suffix_dim_size; ++k, ++output)
+        *output = indices[i * suffix_dim_size + k] == j ? on_value : off_value;
+}
+
+} // namespace
+
+OneHot::OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
+               const Tensor *off_value, Tensor *output, const OneHotParams &params)
+  : KernelWithParams<OneHotParams>({indices, depth, on_value, off_value}, {output}, params)
+{
+  // Do nothing
+}
+
+void OneHot::configure()
+{
+  // check types
+  LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(depth()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(on_value()->element_type() == off_value()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == on_value()->element_type());
+
+  // check shape dependent parameters
+  LUCI_INTERPRETER_CHECK(on_value()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(off_value()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(depth()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(params().axis >= -1 && params().axis <= indices()->shape().num_dims());
+
+  // define parameters that affect the output shape
+  auto const depth_value = getTensorData<int32_t>(depth())[0];
+  auto const &input_shape = indices()->shape();
+  auto const input_dims = input_shape.num_dims();
+  auto const axis = params().axis == -1 ? input_dims : params().axis;
+
+  // define output shape
+  Shape output_shape(input_shape.num_dims() + 1);
+  {
+    for (int32_t d = 0; d < axis; ++d)
+      output_shape.dim(d) = input_shape.dim(d);
+
+    output_shape.dim(axis) = depth_value;
+
+    for (int32_t d = axis + 1; d < output_shape.num_dims(); ++d)
+      output_shape.dim(d) = input_shape.dim(d - 1);
+  }
+
+  // reshape output
+  output()->resize(output_shape);
+}
+
+void OneHot::execute() const
+{
+  auto const depth_value = getTensorData<int32_t>(depth())[0];
+  auto const axis = params().axis;
+
+  switch (output()->element_type())
+  {
+    case loco::DataType::FLOAT32:
+      OneHotComputeImpl<float>(indices(), on_value(), off_value(), depth_value, axis, output());
+      break;
+    case loco::DataType::U8:
+      OneHotComputeImpl<uint8_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+      break;
+    case loco::DataType::S16:
+      OneHotComputeImpl<int16_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+      break;
+    default:
+      // TODO Support other data types
+      throw std::runtime_error("Not supported, yet!");
+      break;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.h b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.h
new file mode 100644
index 000000000..572f857ae
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ONEHOT_H
+#define LUCI_INTERPRETER_KERNELS_ONEHOT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class OneHot : public KernelWithParams<OneHotParams>
+{
+public:
+  OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
+         const Tensor *off_value, Tensor *output, const OneHotParams &params);
+
+  const Tensor *indices() const { return _inputs[0]; }
+  const Tensor *depth() const { return _inputs[1]; }
+  const Tensor *on_value() const { return _inputs[2]; }
+  const Tensor *off_value() const { return _inputs[3]; }
+
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ONEHOT_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.test.cpp
new file mode 100644
index 000000000..45b6968fa
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.test.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/OneHot.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T1, typename T2>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T1> input_data, std::initializer_list<int32_t> depth_data,
+           std::initializer_list<T2> on_value_data, std::initializer_list<T2> off_value_data,
+           int32_t axis, std::initializer_list<T2> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  constexpr auto input_type = getElementType<T1>();
+  constexpr auto output_type = getElementType<T2>();
+
+  Tensor input_tensor = makeInputTensor<input_type>(input_shape, input_data, memory_manager.get());
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, depth_data, memory_manager.get());
+  Tensor on_value_tensor = makeInputTensor<output_type>({}, on_value_data, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<output_type>({}, off_value_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(output_type);
+
+  OneHotParams params{};
+  params.axis = axis;
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+  EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+template <typename T> class OneHotTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int16_t>;
+TYPED_TEST_SUITE(OneHotTest, DataTypes);
+
+TYPED_TEST(OneHotTest, BasicPattern)
+{
+  // axis 0
+  Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{4, 2, 3},
+                            /*input_data=*/
+                            {
+                              0, 3, 5, //
+                              7, 3, 0, //
+                            },
+                            /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+                            /*axis=*/0,
+                            /*output_data=*/
+                            {
+                              1, 0, 0, //
+                              0, 0, 1, //
+
+                              0, 0, 0, //
+                              0, 0, 0, //
+
+                              0, 0, 0, //
+                              0, 0, 0, //
+
+                              0, 1, 0, //
+                              0, 1, 0, //
+                            });
+  // axis 1
+  Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 4, 3},
+                            /*input_data=*/
+                            {
+                              0, 3, 5, //
+                              7, 3, 0, //
+                            },
+                            /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+                            /*axis=*/1,
+                            /*output_data=*/
+                            {
+                              1, 0, 0, //
+                              0, 0, 0, //
+                              0, 0, 0, //
+                              0, 1, 0, //
+
+                              0, 0, 1, //
+                              0, 0, 0, //
+                              0, 0, 0, //
+                              0, 1, 0, //
+                            });
+  // axis -1
+  Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3, 4},
+                            /*input_data=*/
+                            {
+                              0, 3, 5, //
+                              7, 3, 0, //
+                            },
+                            /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+                            /*axis=*/-1,
+                            /*output_data=*/
+                            {
+                              1, 0, 0, 0, //
+                              0, 0, 0, 1, //
+                              0, 0, 0, 0, //
+
+                              0, 0, 0, 0, //
+                              0, 0, 0, 1, //
+                              1, 0, 0, 0, //
+                            });
+}
+
+TEST(OneHotTest, UnsupportedInputType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  // input type should be integer
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {0}, memory_manager.get());
+
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+  Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  OneHotParams params = {-1};
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(OneHotTest, OutputTypeMismatch_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get());
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+
+  // type of on_value, off_value and output_tensor should be same
+  Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+  OneHotParams params = {-1};
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(OneHotTest, InvalidAxis_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get());
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+  Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  // axis should be in [-1, input_shape.rank]
+  OneHotParams params = {-2};
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.cpp
new file mode 100644
index 000000000..5a6b05c3a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.cpp
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PRelu.h"
+
+#include "kernels/BinaryOpCommon.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
+#include <tensorflow/lite/kernels/internal/reference/prelu.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+PRelu::PRelu(const Tensor *input, const Tensor *alpha, Tensor *output)
+  : Kernel({input, alpha}, {output})
+{
+}
+
+PRelu::~PRelu()
+{
+  // Destructor declared to delete vector of alpha quantized data properly
+}
+
+void PRelu::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(alpha()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->scales().size() <= 1);
+  LUCI_INTERPRETER_CHECK(output()->scales().size() <= 1);
+
+  if (input()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(alpha()->scales().size() <= 1); // remove when CWQ kernel arrives
+    _alpha_multipliers.resize(1);
+    double alpha_multiplier = input()->scale() * alpha()->scale() / output()->scale();
+    quantizeMultiplier(alpha_multiplier, &_alpha_multipliers[0].multiplier,
+                       &_alpha_multipliers[0].shift);
+    double identity_multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+  }
+  else if (input()->element_type() == DataType::S16)
+  {
+    // Common check for correctness of quant params
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+    for (size_t channel = 0; channel < alpha()->zero_points().size(); ++channel)
+    {
+      LUCI_INTERPRETER_CHECK(alpha()->zero_points()[channel] == 0);
+    }
+    // PRelu specific checks for CWQ
+    LUCI_INTERPRETER_CHECK(alpha()->quantized_dimension() == alpha()->shape().num_dims() - 1);
+    LUCI_INTERPRETER_CHECK(static_cast<int32_t>(alpha()->scales().size()) ==
+                           alpha()->shape().dim(alpha()->quantized_dimension()));
+    LUCI_INTERPRETER_CHECK(alpha()->shape().num_elements() ==
+                           input()->shape().dim(input()->shape().num_dims() - 1));
+
+    // all dimension of alpha except last one should be size 1
+    for (int dim = 0; dim < alpha()->shape().num_dims() - 1; ++dim)
+    {
+      LUCI_INTERPRETER_CHECK(alpha()->shape().dim(dim) == 1);
+    }
+
+    std::vector<double> real_multipliers =
+      getQuantizedConvolutionMultiplers(input()->scale(), alpha()->scales(), output()->scale());
+
+    _alpha_multipliers = quantizeMultipliers(real_multipliers);
+
+    double identity_multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+  }
+  output()->resize(calculateShapeForBroadcast(input()->shape(), alpha()->shape()));
+}
+
+void PRelu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void PRelu::evalFloat() const
+{
+  const auto input_data = getTensorData<float>(input());
+  const auto alpha_data = getTensorData<float>(alpha());
+  const auto size = getTensorShape(input()).FlatSize();
+  auto output_data = getTensorData<float>(output());
+
+  auto PReluFunc = [](float input, float alpha) { return input >= 0.0 ? input : input * alpha; };
+
+  if (input()->shape() != alpha()->shape())
+  {
+    tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
+      getTensorShape(input()), getTensorData<float>(input()), getTensorShape(alpha()),
+      getTensorData<float>(alpha()), getTensorShape(output()), getTensorData<float>(output()),
+      PReluFunc);
+  }
+  else
+  {
+    for (auto i = decltype(size){0}; i < size; ++i)
+    {
+      if (input_data[i] >= 0)
+        output_data[i] = input_data[i];
+      else
+        output_data[i] = input_data[i] * alpha_data[i];
+    }
+  }
+}
+
+void PRelu::evalQuantized() const
+{
+  tflite::PreluParams op_params{};
+
+  op_params.input_offset = -input()->zero_point(); // Note the '-'.
+  op_params.alpha_offset = -alpha()->zero_point(); // Note the '-'.
+  op_params.output_offset = output()->zero_point();
+  op_params.output_shift_1 = _output_shift_identity;
+  op_params.output_multiplier_1 = _output_multiplier_identity;
+  op_params.output_shift_2 = _alpha_multipliers[0].shift;
+  op_params.output_multiplier_2 = _alpha_multipliers[0].multiplier;
+
+  if (input()->shape() != alpha()->shape())
+  {
+    tflite::reference_ops::BroadcastPrelu4DSlow(
+      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
+      getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Prelu<uint8_t>(
+      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
+      getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+}
+
+static inline int16_t evalElemS16PRelu(int16_t input_val, int16_t alpha_val,
+                                       const ChannelQuantMultipliers &identity_mult,
+                                       const ChannelQuantMultipliers &alpha_mult)
+{
+  constexpr int32_t quantized_min = std::numeric_limits<int16_t>::min();
+  constexpr int32_t quantized_max = std::numeric_limits<int16_t>::max();
+
+  const int32_t output_val =
+    input_val >= 0
+      ? tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val),
+                                              identity_mult.multiplier, identity_mult.shift)
+      : tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val * alpha_val),
+                                              alpha_mult.multiplier, alpha_mult.shift);
+  const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val));
+  return clamped_output;
+}
+
+void PRelu::evalQuantizedS16() const
+{
+  // Note that this kernel assumes alpha is CWQ
+  tflite::RuntimeShape input_shape = getTensorShape(input());
+  const int16_t *input_data = input()->data<int16_t>();
+  const int16_t *alpha_data = alpha()->data<int16_t>();
+  int16_t *output_data = output()->data<int16_t>();
+
+  const ChannelQuantMultipliers pos_mult{_output_shift_identity, _output_multiplier_identity};
+
+  const int last_dim = input()->shape().num_dims() - 1;
+
+  int32_t outer_dims_size = 1;
+  for (int i = 0; i < last_dim; ++i)
+    outer_dims_size *= input_shape.Dims(i);
+  int32_t quant_dim_size = input_shape.Dims(last_dim);
+
+  for (int32_t outer_dims = 0; outer_dims < outer_dims_size; ++outer_dims)
+    for (int32_t quant_channel = 0; quant_channel < quant_dim_size; ++quant_channel)
+    {
+      const ChannelQuantMultipliers &neg_mult = _alpha_multipliers[quant_channel];
+      size_t offset = static_cast<size_t>(outer_dims) * static_cast<size_t>(quant_dim_size);
+      offset += quant_channel;
+
+      output_data[offset] =
+        evalElemS16PRelu(input_data[offset], alpha_data[quant_channel], pos_mult, neg_mult);
+    }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.h b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.h
new file mode 100644
index 000000000..f7735d418
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PRELU_H
+#define LUCI_INTERPRETER_KERNELS_PRELU_H
+
+#include "core/Kernel.h"
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ChannelQuantMultipliers;
+
+class PRelu : public Kernel
+{
+public:
+  PRelu(const Tensor *input, const Tensor *alpha, Tensor *output);
+
+  ~PRelu();
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *alpha() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedS16() const;
+
+private:
+  std::vector<ChannelQuantMultipliers> _alpha_multipliers;
+  // TODO merge this into one ChannelQuantMultiplier object
+  int32_t _output_multiplier_identity = 0;
+  int _output_shift_identity = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PRELU_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.test.cpp
new file mode 100644
index 000000000..6d97382de
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.test.cpp
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PRelu.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> alpha_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
+           std::initializer_list<T> alpha_data, std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<element_type>(alpha_shape, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(PReluTest, FloatSimple)
+{
+  Check<float>(/*input_shape=*/{2, 3}, /*alpha_shape=*/{2, 3},
+               /*output_shape=*/{2, 3},
+               /*input_data=*/
+               {
+                 0.0f, 1.0f, 3.0f,   // Row 1
+                 1.0f, -1.0f, -2.0f, // Row 2
+               },
+               /*alpha_data=*/
+               {
+                 0.0f, 0.5f, 0.1f, // Row 1
+                 0.0f, 0.5f, 0.1f, // Row 2
+               },
+               /*output_data=*/
+               {
+                 0.0f, 1.0f, 3.0f,   // Row 1
+                 1.0f, -0.5f, -0.2f, // Row 2
+               });
+
+  SUCCEED();
+}
+
+TEST(PReluTest, FloatBroadcast)
+{
+  Check<float>(/*input_shape=*/{1, 2, 2, 3}, /*alpha_shape=*/{1, 1, 3},
+               /*output_shape=*/{1, 2, 2, 3},
+               /*input_data=*/
+               {
+                 0.0f, 0.0f, 0.0f,    // Row 1, Column 1
+                 1.0f, 1.0f, 1.0f,    // Row 1, Column 2
+                 -1.0f, -1.0f, -1.0f, // Row 2, Column 1
+                 -2.0f, -2.0f, -2.0f, // Row 2, Column 2
+               },
+               /*alpha_data=*/
+               {0.0f, 1.0f, 2.0f},
+               /*output_data=*/
+               {
+                 0.0f, 0.0f, 0.0f,   // Row 1, Column 1
+                 1.0f, 1.0f, 1.0f,   // Row 1, Column 2
+                 0.0f, -1.0f, -2.0f, // Row 2, Column 1
+                 0.0f, -2.0f, -4.0f, // Row 2, Column 2
+               });
+
+  SUCCEED();
+}
+
+float GetTolerance(float min, float max) { return (max - min) / 255.0; }
+
+TEST(PReluTest, Uint8Simple)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{-0.8f, 0.2f, 0.9f, 0.7f, 0.1f, -0.4f};
+  std::vector<float> alpha_data{0.5f, 0.5f, 0.5f, 0.25f, 1.0f, 0.25f};
+  std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, 0.7f, 0.1f, -0.1f};
+
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 3, 1}, quant_param.first, quant_param.second, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 3, 1}));
+
+  SUCCEED();
+}
+
+TEST(PReluTest, Uint8Broadcast)
+{
+  std::vector<float> input_data{
+    0.0f,   0.0f,   0.0f,   // Row 1, Column 1
+    0.5f,   0.5f,   0.5f,   // Row 1, Column 2
+    -1.0f,  -1.0f,  -1.0f,  // Row 2, Column 1
+    -0.25f, -0.25f, -0.25f, // Row 2, Column 2
+  };
+  std::vector<float> alpha_data{0.0f, 0.5f, -0.5f};
+  std::vector<float> ref_output_data{
+    0.0f, 0.0f,    0.0f,  // Row 1, Column 1
+    0.5f, 0.5f,    0.5f,  // Row 1, Column 2
+    0.0f, -0.5f,   0.5f,  // Row 2, Column 1
+    0.0f, -0.125f, 0.125f // Row 2, Column 2
+  };
+  std::vector<float> ref_quant_output_data{
+    128, 128, 128, // Row 1, Column 1
+    192, 192, 192, // Row 1, Column 2
+    128, 64,  192, // Row 2, Column 1
+    128, 112, 144  // Row 2, Column 2
+  };
+  float kQuantizedTolerance = 2 * (1. / 256);
+  const float kMin = -1;
+  const float kMax = 127.f / 128.f;
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax);
+
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 2, 3}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 3}, quant_param.first, quant_param.second, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_quant_output_data));
+}
+
+TEST(PReluTest, SInt16_LWQ_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  // Rewrite this test in case layer-wise quantization for sint16 is supported
+  std::vector<float> input_data(6); // data is not important
+  std::vector<float> alpha_data(6);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, SInt16_CWQ_Simple)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f};
+  std::vector<float> alpha_data{0.5f, 0.25f};
+  std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f};
+
+  std::vector<float> alpha_scales{0.05f, 0.025f};
+  std::vector<int32_t> zerop{0, 0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({2}, alpha_scales, zerop, 0, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(PReluTest, SInt16_CWQ_spatial_alpha_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data(6); // data is not important
+  std::vector<float> alpha_data(6);
+
+  std::vector<float> alpha_scales{0.25f, 0.05f};
+  std::vector<int32_t> zerop{0, 0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, alpha_scales, zerop, 3,
+                                                       alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, SInt16_CWQ_wrong_dim_quant_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data(6); // data is not important
+  std::vector<float> alpha_data(6);
+
+  std::vector<float> alpha_scales{0.25f};
+  std::vector<int32_t> zerop{0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 2}, alpha_scales, zerop, 1,
+                                                       alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, SInt16_CWQ_uneven_shape1)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f};
+  std::vector<float> alpha_data{0.5f, 0.25f};
+  std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f};
+
+  std::vector<float> alpha_scales{0.05f, 0.025f};
+  std::vector<int32_t> zerop{0, 0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 2}, alpha_scales, zerop, 2,
+                                                       alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(PReluTest, SInt16_CWQ_uneven_shape2)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{
+    0.0f,   0.0f,   0.0f,   // Row 1, Column 1
+    0.5f,   0.5f,   0.5f,   // Row 1, Column 2
+    -1.0f,  -1.0f,  -1.0f,  // Row 2, Column 1
+    -0.25f, -0.25f, -0.25f, // Row 2, Column 2
+  };
+  std::vector<float> alpha_data{0.0f, 0.5f, -0.5f};
+  std::vector<float> ref_output_data{
+    0.0f, 0.0f,    0.0f,  // Row 1, Column 1
+    0.5f, 0.5f,    0.5f,  // Row 1, Column 2
+    0.0f, -0.5f,   0.5f,  // Row 2, Column 1
+    0.0f, -0.125f, 0.125f // Row 2, Column 2
+  };
+
+  std::vector<float> alpha_scales{1.f, 0.05f, 0.1f};
+  std::vector<int32_t> zerop{0, 0, 0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 2, 3}, 0.01, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 3}, alpha_scales, zerop, 3,
+                                                       alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.001, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(PReluTest, Input_Output_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Input_Alpha_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::U8>({1}, {1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Invalid_Input_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST(PReluTest, Input_Output_U8_CWQ_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> scales{1.f, 1.f};
+  std::vector<int32_t> zerop{0, 0};
+  std::vector<float> dummy_data(4, 0.f);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor output_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Input_Output_S16_CWQ_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> scales{1.f, 1.f};
+  std::vector<int32_t> zerop{0, 0};
+  std::vector<float> dummy_data(4, 0.f);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor output_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Mixing_U8_S16_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> dummy_data(4, 0.f);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
+  Tensor output_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pack.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.cpp
new file mode 100644
index 000000000..42aab330c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pack.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pack::Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams &params)
+  : KernelWithParams<PackParams>(std::move(inputs), {output}, params)
+{
+}
+
+void Pack::configure()
+{
+  LUCI_INTERPRETER_CHECK(_inputs.size() == static_cast<uint32_t>(params().values_count));
+  const Tensor *t0 = _inputs[0];
+  const int dimension_size = t0->shape().num_dims() + 1;
+  int axis = params().axis;
+  if (axis < 0)
+  {
+    axis += dimension_size;
+  }
+  LUCI_INTERPRETER_CHECK(axis >= 0 && axis <= t0->shape().num_dims());
+
+  if (t0->element_type() != DataType::S32 && t0->element_type() != DataType::FLOAT32 &&
+      t0->element_type() != DataType::U8 && t0->element_type() != DataType::S8 &&
+      t0->element_type() != DataType::S16 && t0->element_type() != DataType::S64)
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+
+  for (uint32_t i = 1; i < _inputs.size(); ++i)
+  {
+    const Tensor *tensor = _inputs[i];
+    LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type());
+    LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims());
+    for (int d = 0; d < t0->shape().num_dims(); ++d)
+    {
+      LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d));
+    }
+  }
+
+  Shape output_shape(dimension_size);
+  int i = 0;
+  for (int index = 0; index < dimension_size; ++index)
+  {
+    if (index == axis)
+    {
+      output_shape.dim(index) = params().values_count;
+    }
+    else
+    {
+      output_shape.dim(index) = t0->shape().dim(i++);
+    }
+  }
+
+  if (t0->element_type() == DataType::U8 || t0->element_type() == DataType::S8 ||
+      t0->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == t0->zero_point());
+    LUCI_INTERPRETER_CHECK(output()->scale() == t0->scale());
+    // Guarantee input/output quantization params match as we do not support
+    // packing quantized tensors.
+    for (int i = 0; i < params().values_count; i++)
+    {
+      LUCI_INTERPRETER_CHECK(_inputs[i]->zero_point() == t0->zero_point());
+      LUCI_INTERPRETER_CHECK(_inputs[i]->scale() == t0->scale());
+    }
+  }
+
+  output()->resize(output_shape);
+}
+
+void Pack::execute() const
+{
+  switch (_inputs[0]->element_type())
+  {
+    case DataType::FLOAT32:
+      evalGeneric<float>();
+      break;
+    case DataType::U8:
+      evalGeneric<uint8_t>();
+      break;
+    case DataType::S8:
+      evalGeneric<int8_t>();
+      break;
+    case DataType::S16:
+      evalGeneric<int16_t>();
+      break;
+    case DataType::S32:
+      evalGeneric<int32_t>();
+      break;
+    case DataType::S64:
+      evalGeneric<int64_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void Pack::evalGeneric() const
+{
+  const Tensor *t0 = _inputs[0];
+  const int dimension_size = t0->shape().num_dims() + 1;
+  int axis = params().axis;
+  if (axis < 0)
+  {
+    axis += dimension_size;
+  }
+
+  VectorOfTensors<T, true> inputs(_inputs);
+  tflite::PackParams params{};
+  params.axis = axis;
+  params.inputs_count = _inputs.size();
+  tflite::reference_ops::Pack<T>(params, inputs.shapes(), inputs.data(), getTensorShape(output()),
+                                 getTensorData<T>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pack.h b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.h
new file mode 100644
index 000000000..4a2fcfd80
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PACK_H
+#define LUCI_INTERPRETER_KERNELS_PACK_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Pack : public KernelWithParams<PackParams>
+{
+public:
+  Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams &params);
+
+  const Tensor *input(int index) const { return _inputs[index]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void evalGeneric() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PACK_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pack.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.test.cpp
new file mode 100644
index 000000000..d16320b78
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.test.cpp
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pack.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::vector<std::initializer_list<int32_t>> input_shapes,
+           std::initializer_list<int32_t> output_shape, std::vector<std::vector<T>> input_datas,
+           std::initializer_list<T> output_data, int32_t axis)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  std::vector<const Tensor *> inputs(input_datas.size());
+  std::vector<Tensor> tmp_inputs;
+  for (int i = 0; i < input_datas.size(); i++)
+  {
+    if (std::is_same<T, float>::value || std::is_same<T, int32_t>::value ||
+        std::is_same<T, int64_t>::value)
+    {
+      tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {}, ""));
+      memory_manager->allocate_memory(tmp_inputs[i]);
+      tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
+    }
+    else if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value)
+    {
+      tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f / 255}, {128}}, ""));
+      memory_manager->allocate_memory(tmp_inputs[i]);
+      tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
+    }
+    else
+    {
+      assert((std::is_same<T, int16_t>::value) && "unexpected dtype is tested");
+      tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f}, {0}}, ""));
+      memory_manager->allocate_memory(tmp_inputs[i]);
+      tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
+    }
+  }
+  for (int i = 0; i < input_datas.size(); i++)
+  {
+    inputs[i] = &tmp_inputs[i];
+  }
+
+  Tensor output_tensor = makeOutputTensor(element_type);
+  if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value)
+  {
+    output_tensor = makeOutputTensor(element_type, 1.0f / 255, 128);
+  }
+  else if (std::is_same<T, int16_t>::value)
+  {
+    output_tensor = makeOutputTensor(element_type, 1.0f, 0);
+  }
+
+  PackParams params{};
+  params.axis = axis;
+  params.values_count = input_datas.size();
+  Pack kernel(inputs, &output_tensor, params);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class PackTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<uint8_t, int8_t, int16_t, int32_t, int64_t, float>;
+TYPED_TEST_SUITE(PackTest, DataTypes);
+
+TYPED_TEST(PackTest, ThreeInputs)
+{
+  Check<TypeParam>(/*input_shapes=*/{{2}, {2}, {2}},
+                   /*output_shape=*/{3, 2},
+                   /*input_datas=*/
+                   {{1, 4}, {2, 5}, {3, 6}},
+                   /*output_data=*/
+                   {1, 4, 2, 5, 3, 6}, /*axis=*/0);
+
+  SUCCEED();
+}
+
+TYPED_TEST(PackTest, NegAxis)
+{
+  Check<TypeParam>(/*input_shapes=*/{{2}, {2}, {2}},
+                   /*output_shape=*/{2, 3},
+                   /*input_datas=*/
+                   {{1, 4}, {2, 5}, {3, 6}},
+                   /*output_data=*/
+                   {1, 2, 3, 4, 5, 6}, /*axis=*/-1);
+
+  SUCCEED();
+}
+
+TEST(Pack, MismatchingInputValuesCount_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input1_data{1, 4};
+  std::vector<float> input2_data{2, 5};
+  std::vector<float> input3_data{3, 6};
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data, memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data, memory_manager.get());
+  Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  PackParams params{};
+  {
+    params.axis = 0;
+    params.values_count = 2;
+
+    Pack kernel({&input1_tensor, &input2_tensor, &input3_tensor}, &output_tensor, params);
+    EXPECT_ANY_THROW(kernel.configure());
+  }
+}
+
+TEST(Pack, InvalidInputAxis_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input1_data{1, 4};
+  std::vector<float> input2_data{2, 5};
+  std::vector<float> input3_data{3, 6};
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data, memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data, memory_manager.get());
+  Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  PackParams params{};
+  {
+    params.axis = 2;
+    params.values_count = 3;
+
+    Pack kernel({&input1_tensor, &input2_tensor, &input3_tensor}, &output_tensor, params);
+    EXPECT_ANY_THROW(kernel.configure());
+  }
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.cpp
new file mode 100644
index 000000000..c07f6e310
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pad.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/pad.h>
+
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pad::Pad(const Tensor *input, const Tensor *paddings, Tensor *output)
+  : Kernel({input, paddings}, {output})
+{
+}
+
+void Pad::configure()
+{
+  const Shape &input_shape = input()->shape();
+  const int num_dims = input_shape.num_dims();
+
+  if (num_dims > 4)
+    throw std::runtime_error("Unsupported number of dimensions.");
+
+  assert(output()->element_type() == input()->element_type());
+  assert(paddings()->element_type() == DataType::S32);
+  // Paddings shape should be [N, 2].
+  assert(paddings()->shape().num_dims() == 2);
+  assert(paddings()->shape().dim(0) == num_dims);
+  assert(paddings()->shape().dim(1) == 2);
+
+  Shape output_shape(num_dims);
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = 0; i < num_dims; ++i)
+  {
+    const int32_t padding_before = paddings_data[i * 2];
+    const int32_t padding_after = paddings_data[i * 2 + 1];
+    assert(padding_before >= 0 && padding_after >= 0);
+    output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
+  }
+
+  output()->resize(output_shape);
+}
+
+void Pad::execute() const
+{
+  const int num_dims = input()->shape().num_dims();
+
+  tflite::PadParams params{};
+  params.left_padding_count = num_dims;
+  params.right_padding_count = num_dims;
+
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = num_dims - 1; i >= 0; --i)
+  {
+    params.left_padding[i] = paddings_data[i * 2];
+    params.right_padding[i] = paddings_data[i * 2 + 1];
+  }
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+    {
+      const float pad_value = 0.0f;
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<float>(output()));
+      break;
+    }
+    case DataType::U8:
+    {
+      assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
+      assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
+      const auto pad_value = static_cast<uint8_t>(output()->zero_point());
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<uint8_t>(output()));
+      break;
+    }
+    case DataType::S8:
+    {
+      assert(output()->zero_point() >= std::numeric_limits<int8_t>::min());
+      assert(output()->zero_point() <= std::numeric_limits<int8_t>::max());
+      const auto pad_value = static_cast<int8_t>(output()->zero_point());
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<int8_t>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<int8_t>(output()));
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pad.h b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.h
new file mode 100644
index 000000000..e05b47f29
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PAD_H
+#define LUCI_INTERPRETER_KERNELS_PAD_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Pad : public Kernel
+{
+public:
+  Pad(const Tensor *input, const Tensor *paddings, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *paddings() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PAD_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pad.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.test.cpp
new file mode 100644
index 000000000..dd3ce947c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.test.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pad.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+float GetTolerance(float min, float max) { return (max - min) / 255.0; }
+
+TEST(Pad, Uint8)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+  std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3};
+  std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0};
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0, -0.8, 0.2, 0.9, 0, 0, 0, 0, 0.7, 0.1, -0.3, 0, 0, 0,
+                                     0, 0,    0,   0,   0, 0, 0, 0, 0,   0,   0,    0, 0, 0};
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1}));
+}
+
+TEST(Pad, Int8)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::pair<float, int32_t> quant_param = quantizationParams<int8_t>(-1.0f, 1.0f);
+  std::vector<float> input_data{-0.2, 0.4, 0.5, -0.7, -0.1, -0.9, 0.7, 0.1, 0.2};
+  std::vector<int32_t> paddings_data{0, 0, 1, 2, 2, 1, 0, 0};
+  Tensor input_tensor = makeInputTensor<DataType::S8>(
+    {1, 3, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second);
+
+  Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0, 0, 0,    0,    0,    0, 0, 0, -0.2, 0.4, 0.5, 0,
+                                     0, 0, -0.7, -0.1, -0.9, 0, 0, 0, 0.7,  0.1, 0.2, 0,
+                                     0, 0, 0,    0,    0,    0, 0, 0, 0,    0,   0,   0};
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 6, 6, 1}));
+}
+
+TEST(Pad, Float)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6};
+  std::vector<int32_t> paddings_data{1, 0, 0, 2, 0, 3, 0, 0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                     0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 4, 5,
+                                     6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  std::initializer_list<int32_t> ref_output_shape{2, 4, 6, 1};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.cpp
new file mode 100644
index 000000000..197cdaa69
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PadV2.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/pad.h>
+
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+PadV2::PadV2(const Tensor *input, const Tensor *paddings, const Tensor *constant_values,
+             Tensor *output)
+  : Kernel({input, paddings, constant_values}, {output})
+{
+}
+
+void PadV2::configure()
+{
+  const Shape &input_shape = input()->shape();
+  const int num_dims = input_shape.num_dims();
+
+  if (num_dims > 4)
+    throw std::runtime_error("Unsupported number of dimensions.");
+
+  assert(output()->element_type() == input()->element_type());
+  assert(paddings()->element_type() == DataType::S32);
+  assert(constant_values()->element_type() == output()->element_type());
+  // Paddings shape should be [N, 2].
+  assert(paddings()->shape().num_dims() == 2);
+  assert(paddings()->shape().dim(0) == num_dims);
+  assert(paddings()->shape().dim(1) == 2);
+  // Constant values elements number should be 1.
+  assert(constant_values()->shape().num_elements() == 1);
+
+  Shape output_shape(num_dims);
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = 0; i < num_dims; ++i)
+  {
+    const int32_t padding_before = paddings_data[i * 2];
+    const int32_t padding_after = paddings_data[i * 2 + 1];
+    assert(padding_before >= 0 && padding_after >= 0);
+    output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
+  }
+
+  output()->resize(output_shape);
+}
+
+void PadV2::execute() const
+{
+  const int num_dims = input()->shape().num_dims();
+
+  tflite::PadParams params{};
+  params.left_padding_count = num_dims;
+  params.right_padding_count = num_dims;
+
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = num_dims - 1; i >= 0; --i)
+  {
+    params.left_padding[i] = paddings_data[i * 2];
+    params.right_padding[i] = paddings_data[i * 2 + 1];
+  }
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+    {
+      const auto pad_value = getTensorData<float>(constant_values())[0];
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<float>(output()));
+      break;
+    }
+    case DataType::U8:
+    {
+      assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
+      assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
+      const auto pad_value = getTensorData<uint8_t>(constant_values())[0];
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<uint8_t>(output()));
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.h b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.h
new file mode 100644
index 000000000..48a31f584
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PAD_V2_H
+#define LUCI_INTERPRETER_KERNELS_PAD_V2_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class PadV2 : public Kernel
+{
+public:
+  PadV2(const Tensor *input, const Tensor *paddings, const Tensor *constant_values, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *paddings() const { return _inputs[1]; }
+  const Tensor *constant_values() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PAD_V2_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.test.cpp
new file mode 100644
index 000000000..41efaff06
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PadV2.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+float GetTolerance(float min, float max) { return (max - min) / 255.0; }
+
+TEST(PadV2, Uint8)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+  std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3};
+  std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0};
+  std::vector<float> constant_values_data{0.5};
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+  Tensor constant_values = makeInputTensor<DataType::U8>(
+    {1}, quant_param.first, quant_param.second, constant_values_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data = {
+    0.5, -0.8, 0.2, 0.9, 0.5, 0.5, 0.5, 0.5, 0.7, 0.1, -0.3, 0.5, 0.5, 0.5,  //
+    0.5, 0.5,  0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,  0.5, 0.5, 0.5}; //
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1}));
+}
+
+TEST(PadV2, Float)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6};
+  std::vector<int32_t> paddings_data{1, 0, 0, 2, 0, 3, 0, 0};
+  std::vector<float> constant_values_data{7};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+  Tensor constant_values =
+    makeInputTensor<DataType::FLOAT32>({1}, constant_values_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+                                     7, 7, 7, 7, 7, 7, 7, 7, 1, 2, 3, 7, 7, 7, 4, 5,
+                                     6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7};
+  std::initializer_list<int32_t> ref_output_shape{2, 4, 6, 1};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pow.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.cpp
new file mode 100644
index 000000000..722c64024
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pow.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pow::Pow(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void Pow::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Pow::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      eval<float>();
+      break;
+    case DataType::S32:
+      eval<int32_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void Pow::eval() const
+{
+  tflite::ArithmeticParams params{};
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastPow4DSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                                              getTensorShape(input2()), getTensorData<T>(input2()),
+                                              getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Pow(getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pow.h b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.h
new file mode 100644
index 000000000..8ff865e40
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_POW_H
+#define LUCI_INTERPRETER_KERNELS_POW_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Pow : public Kernel
+{
+public:
+  Pow(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void eval() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_POW_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pow.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.test.cpp
new file mode 100644
index 000000000..0e858115d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.test.cpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pow.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class PowTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(PowTest, SimplePow)
+{
+  std::initializer_list<int32_t> base_shape = {1, 1, 3, 2};
+
+  std::vector<float> input1_data{0.3f, 2.3f, 0.9f, 0.5f, 0.8f, 1.1f};
+  std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  std::vector<float> test_outputs{0.786f, 1.2838f, 1.043f, 0.7071f, 0.8f, 1.08956f};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape));
+}
+
+TEST_F(PowTest, FloatBroadcastPow)
+{
+  std::initializer_list<int32_t> input1_shape = {1, 3};
+  std::initializer_list<int32_t> input2_shape = {3, 1};
+
+  std::vector<float> input1_data{0.3f, 2.3f, 0.9f};
+  std::vector<float> input2_data{0.2f, 0.3f, 0.4f};
+  std::vector<float> test_outputs{0.786f,   1.18126f, 0.9791f, 0.6968f, 1.28386f,
+                                  0.96888f, 0.6178f,  1.3953f, 0.9587f};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+}
+
+TEST_F(PowTest, IntPow)
+{
+  std::initializer_list<int32_t> base_shape = {1, 3};
+
+  std::vector<int32_t> input_data{2, 3, 4};
+  std::vector<int32_t> test_outputs{4, 27, 256};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(test_outputs));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape));
+}
+
+TEST_F(PowTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(PowTest, Input_Type_Mismatch_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {4}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(PowTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.cpp
new file mode 100644
index 000000000..0c8544a65
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Quantize.h"
+#include "kernels/Utils.h"
+#include "PALQuantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename input_dtype> void call_requantize(const Tensor *input, Tensor *output)
+{
+  int32_t multiplier;
+  int shift;
+
+  const double effective_output_scale = input->scale() / output->scale();
+  quantizeMultiplier(effective_output_scale, &multiplier, &shift);
+
+  const auto input_shape = getTensorShape(input);
+  const auto output_shape = getTensorShape(output);
+  const auto size = tflite::MatchingFlatSize(input_shape, output_shape);
+
+  const auto input_data = getTensorData<input_dtype>(input);
+
+  switch (output->element_type())
+  {
+    case loco::DataType::S8:
+      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+                                       output->zero_point(), getTensorData<int8_t>(output));
+      break;
+    case loco::DataType::U8:
+      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+                                       output->zero_point(), getTensorData<uint8_t>(output));
+      break;
+    case loco::DataType::S16:
+      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+                                       output->zero_point(), getTensorData<int16_t>(output));
+      break;
+    default:
+      throw std::runtime_error("Unsupported quantized type, yet!");
+  }
+}
+
+} // namespace
+
+Quantize::Quantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Quantize::configure()
+{
+
+  if (input()->element_type() == loco::DataType::S16)
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+  switch (input()->element_type())
+  {
+    case loco::DataType::FLOAT32:
+    {
+      LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::U8 ||
+                             output()->element_type() == loco::DataType::S8 ||
+                             output()->element_type() == loco::DataType::S16);
+      break;
+    }
+    case loco::DataType::S16:
+    case loco::DataType::S8:
+    case loco::DataType::U8:
+    {
+      LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8 ||
+                             output()->element_type() == loco::DataType::U8 ||
+                             output()->element_type() == loco::DataType::S16);
+      if (output()->element_type() == loco::DataType::S16)
+      {
+        LUCI_INTERPRETER_CHECK(output()->zero_point() == 0);
+      }
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type");
+  }
+
+  output()->resize(input()->shape());
+}
+
+void Quantize::execute() const
+{
+  switch (input()->element_type())
+  {
+    case loco::DataType::FLOAT32:
+    {
+      tflite::QuantizationParams op_params;
+      op_params.zero_point = output()->zero_point();
+      op_params.scale = output()->scale();
+      const auto input_data = getTensorData<float>(input());
+
+      switch (output()->element_type())
+      {
+        case loco::DataType::S8:
+        {
+          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+                                         getTensorShape(output()), getTensorData<int8_t>(output()));
+          break;
+        }
+        case loco::DataType::U8:
+        {
+          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+                                         getTensorShape(output()),
+                                         getTensorData<uint8_t>(output()));
+          break;
+        }
+        case loco::DataType::S16:
+        {
+          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+                                         getTensorShape(output()),
+                                         getTensorData<int16_t>(output()));
+          break;
+        }
+        default:
+          throw std::runtime_error("Unsupported type.");
+      }
+      break;
+    }
+    case loco::DataType::S16:
+    {
+      call_requantize<int16_t>(input(), output());
+      break;
+    }
+    case loco::DataType::S8:
+    {
+      call_requantize<int8_t>(input(), output());
+      break;
+    }
+    case loco::DataType::U8:
+    {
+      call_requantize<uint8_t>(input(), output());
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.h b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.h
new file mode 100644
index 000000000..006c5366f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_QUANTIZE_H
+#define LUCI_INTERPRETER_KERNELS_QUANTIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Quantize : public Kernel
+{
+public:
+  Quantize(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_QUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.test.cpp
new file mode 100644
index 000000000..22e67fe3f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.test.cpp
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Quantize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class QuantizeTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(QuantizeTest, FloatUint8)
+{
+  std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  std::vector<uint8_t> ref_output_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, FloatInt8)
+{
+  std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  std::vector<int8_t> ref_output_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, FloatInt16)
+{
+  std::vector<float> input_data{-63.5, -63, -3, -2, -1, 1, 2, 3, 63.5, 64};
+
+  std::vector<int16_t> ref_output_data{-12700, -12600, -600, -400,  -200,
+                                       200,    400,    600,  12700, 12800};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.005, /*zero_point*/ 0);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int16_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, Int16Int16)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<int16_t> ref_output_data{2, 4, 6, 8, 10, 12, 14, 16, 18, 20};
+
+  Tensor input_tensor = makeInputTensor<DataType::S16>(
+    {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.5, /*zero_point*/ 0);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int16_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Int8Int8)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+  Tensor input_tensor = makeInputTensor<DataType::S8>(
+    {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ -1, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Uint8Uint8)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<uint8_t> ref_output_data{129, 131, 133, 135, 137, 139, 141, 143, 145, 147};
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ 127, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Int16Int8)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+  Tensor input_tensor = makeInputTensor<DataType::S16>(
+    {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, InvalidInputType_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForFloatInput_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForInt16Input_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForInt8Input_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForUint8Input_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidInputZeroPoint_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, -1, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.cpp
new file mode 100644
index 000000000..747ec6cc8
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu.h"
+#include "kernels/Utils.h"
+
+#include "PALRelu.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Relu::Relu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Relu::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+  }
+
+  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
+  {
+    double multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift);
+  }
+  output()->resize(input()->shape());
+}
+
+void Relu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Relu::evalFloat() const
+{
+  const auto input_data = getTensorData<float>(input());
+  const auto input_shape = getTensorShape(input());
+  auto output_data = getTensorData<float>(output());
+  auto output_shape = getTensorShape(output());
+
+  luci_interpreter_pal::Relu(input_shape, input_data, output_shape, output_data);
+}
+
+void Relu::evalQuantized() const
+{
+  tflite::ReluParams params;
+  params.input_offset = input()->zero_point();
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = _output_multiplier;
+  params.output_shift = _output_shift;
+
+  params.quantized_activation_min =
+    std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
+  params.quantized_activation_max = static_cast<int32_t>(std::numeric_limits<uint8_t>::max());
+
+  luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                              getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void Relu::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  constexpr int32_t output_min = 0;
+  constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
+
+  const int32_t num_elements = input()->shape().num_elements();
+
+  for (int32_t i = 0; i < num_elements; ++i)
+  {
+    const int32_t input_val = input_data[i];
+    int32_t output_val =
+      tflite::MultiplyByQuantizedMultiplier(input_val, _output_multiplier, _output_shift);
+    output_val = std::max(output_val, output_min);
+    output_val = std::min(output_val, output_max);
+    output_data[i] = static_cast<int16_t>(output_val);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu.h b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.h
new file mode 100644
index 000000000..b813f0cdf
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RELU_H
+#define LUCI_INTERPRETER_KERNELS_RELU_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Relu : public Kernel
+{
+public:
+  Relu(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedS16() const;
+
+private:
+  int32_t _output_multiplier{0};
+  int32_t _output_shift{0};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RELU_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.test.cpp
new file mode 100644
index 000000000..bd32e3cc9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.test.cpp
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ReluTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ReluTest, FloatSimple)
+{
+  std::vector<float> input_data{
+    0.0f, 1.0f,  3.0f,  // Row 1
+    1.0f, -1.0f, -2.0f, // Row 2
+  };
+
+  std::vector<float> ref_output_data{
+    0.0f, 1.0f, 3.0f, // Row 1
+    1.0f, 0.0f, 0.0f, // Row 2
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(ReluTest, Uint8Quantized)
+{
+  std::vector<float> input_data{
+    0, -6, 2, 4, //
+    3, -2, 7, 1, //
+  };
+  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+  const float f_min = (-128.0 / 128.0) * 8;
+  const float f_max = (127.0 / 128.0) * 8;
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({128, 128, 160, 192, 176, 128, 240, 144}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1}));
+}
+
+TEST_F(ReluTest, Uint8Requantized)
+{
+  std::vector<float> input_data{
+    0, -6, 2, 4, //
+    3, -2, 7, 1, //
+  };
+
+  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+  const float in_min = (-128.0 / 128.0) * 8;
+  const float in_max = (127.0 / 128.0) * 8;
+  const float out_min = (0.0 / 256.0) * 8;
+  const float out_max = (255.0 / 256.0) * 8;
+
+  std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get());
+
+  std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({0, 0, 64, 128, 96, 0, 224, 32}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1}));
+}
+
+TEST_F(ReluTest, SInt16)
+{
+  std::vector<float> input_data{
+    0, -6, 2, 4, //
+    3, -2, 7, 1, //
+  };
+  std::vector<float> ref_output_data{
+    0, 0, 2, 4, //
+    3, 0, 7, 1, //
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 4, 1}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.25, 0);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(ReluTest, Input_Output_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ReluTest, Invalid_Input_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.cpp
new file mode 100644
index 000000000..07205ed3a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu6.h"
+#include "kernels/Utils.h"
+
+#include "PALRelu6.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Relu6::Relu6(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Relu6::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  if (input()->element_type() == DataType::U8)
+  {
+    double multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift);
+  }
+  output()->resize(input()->shape());
+}
+
+void Relu6::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Relu6::evalFloat() const
+{
+  const auto input_data = getTensorData<float>(input());
+  const auto input_shape = getTensorShape(input());
+  auto output_data = getTensorData<float>(output());
+  auto output_shape = getTensorShape(output());
+
+  luci_interpreter_pal::Relu6(input_shape, input_data, output_shape, output_data);
+}
+
+void Relu6::evalQuantized() const
+{
+  tflite::ReluParams params;
+  params.input_offset = input()->zero_point();
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = _output_multiplier;
+  params.output_shift = _output_shift;
+
+  params.quantized_activation_min =
+    std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
+  params.quantized_activation_max =
+    std::min(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()),
+             params.output_offset + static_cast<int32>(roundf(6.f / output()->scale())));
+
+  luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                              getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.h b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.h
new file mode 100644
index 000000000..f5030b588
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RELU6_H
+#define LUCI_INTERPRETER_KERNELS_RELU6_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Relu6 : public Kernel
+{
+public:
+  Relu6(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _output_multiplier{0};
+  int32_t _output_shift{0};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RELU6_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.test.cpp
new file mode 100644
index 000000000..af7b3f3db
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.test.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu6.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class Relu6Test : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(Relu6Test, FloatSimple)
+{
+  std::vector<float> input_data{
+    0.0f, 1.0f,  3.0f,  // Row 1
+    7.0f, -1.0f, -2.0f, // Row 2
+  };
+
+  std::vector<float> ref_output_data{
+    0.0f, 1.0f, 3.0f, // Row 1
+    6.0f, 0.0f, 0.0f, // Row 2
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(Relu6Test, Uint8Quantized)
+{
+  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+  const float f_min = (-128.0 / 128.0) * 10;
+  const float f_max = (127.0 / 128.0) * 10;
+  const float tolerance = (f_max - f_min) / 255.0;
+
+  std::vector<float> input_data{
+    0,  -6, 2, 8, //
+    -2, 3,  7, 1, //
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({128, 128, 154, 205, 128, 166, 205, 141}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance));
+}
+
+TEST_F(Relu6Test, Uint8Requantized)
+{
+  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+  const float in_min = (-128.0 / 128.0) * 10;
+  const float in_max = (127.0 / 128.0) * 10;
+  const float out_min = (0.0 / 256.0) * 0;
+  const float out_max = (255.0 / 256.0) * 6;
+  const float tolerance = (in_max - in_min) / 255.0;
+
+  std::vector<float> input_data{
+    0,  -6, 2, 8, //
+    -2, 3,  7, 1, //
+  };
+
+  std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get());
+
+  std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({0, 0, 87, 255, 0, 127, 255, 43}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance));
+}
+
+TEST_F(Relu6Test, Input_Output_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Relu6Test, Invalid_Input_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.cpp
new file mode 100644
index 000000000..61d3300b2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Reshape.h"
+
+#include <cassert>
+#include <cstring>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+static Shape extractShapeFromTensor(const Tensor *tensor)
+{
+  assert(tensor->element_type() == DataType::S32);
+  Shape shape(tensor->shape().num_elements());
+  const auto *shape_data = tensor->data<int32_t>();
+  for (int i = 0; i < tensor->shape().num_elements(); ++i)
+  {
+    shape.dim(i) = shape_data[i];
+  }
+  return shape;
+}
+
+static void resolveUnknownDimension(const Shape &input_shape, Shape *output_shape)
+{
+  const int32_t num_input_elements = input_shape.num_elements();
+  int32_t num_output_elements = 1;
+  int unknown_dim_index = -1;
+  for (int i = 0; i < output_shape->num_dims(); ++i)
+  {
+    const int32_t value = output_shape->dim(i);
+    if (value == -1)
+    {
+      assert(unknown_dim_index == -1);
+      unknown_dim_index = i;
+    }
+    else
+    {
+      num_output_elements *= value;
+    }
+  }
+  if (unknown_dim_index != -1)
+  {
+    output_shape->dim(unknown_dim_index) = num_input_elements / num_output_elements;
+    num_output_elements *= output_shape->dim(unknown_dim_index);
+  }
+  assert(num_output_elements == num_input_elements);
+}
+
+Reshape::Reshape(const Tensor *input, const Tensor *shape, Tensor *output)
+  : Kernel({input, shape}, {output})
+{
+}
+
+void Reshape::configure()
+{
+  Shape output_shape = extractShapeFromTensor(shape());
+  resolveUnknownDimension(input()->shape(), &output_shape);
+  output()->resize(output_shape);
+}
+
+void Reshape::execute() const
+{
+  const auto *input_data = input()->data<void>();
+  auto *output_data = output()->data<void>();
+
+  const size_t element_size = getDataTypeSize(input()->element_type());
+  const int32_t num_elements = input()->shape().num_elements();
+  std::memcpy(output_data, input_data, num_elements * element_size);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.h b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.h
new file mode 100644
index 000000000..99b947f77
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RESHAPE_H
+#define LUCI_INTERPRETER_KERNELS_RESHAPE_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Reshape : public Kernel
+{
+public:
+  Reshape(const Tensor *input, const Tensor *shape, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *shape() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RESHAPE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.test.cpp
new file mode 100644
index 000000000..c2ff3ea1b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.test.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Reshape.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ReshapeTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+// TODO Test types other than FLOAT32.
+
+TEST_F(ReshapeTest, Regular)
+{
+  Shape input_shape{1, 2, 2, 3};
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  Shape shape_shape{2};
+  std::vector<int32_t> shape_data{3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor shape_tensor =
+    makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
+}
+
+TEST_F(ReshapeTest, UnknownDimension)
+{
+  Shape input_shape{2, 1, 2, 3};
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  Shape shape_shape{3};
+  std::vector<int32_t> shape_data{2, -1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor shape_tensor =
+    makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp
new file mode 100644
index 000000000..e2ddd6a7b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeBilinear.h"
+
+#include "kernels/Utils.h"
+
+#include "PALResizeBilinear.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ResizeBilinear::ResizeBilinear(const Tensor *input, const Tensor *size, Tensor *output,
+                               const ResizeBilinearParams &params)
+  : KernelWithParams<ResizeBilinearParams>({input, size}, {output}, params)
+{
+}
+
+void ResizeBilinear::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32);
+  if (params().half_pixel_centers && params().align_corners)
+    throw std::runtime_error("If half_pixel_centers is True, align_corners must be False.");
+  LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2);
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = getTensorData<int32_t>(size())[0];
+  output_shape.dim(2) = getTensorData<int32_t>(size())[1];
+  output_shape.dim(3) = input()->shape().dim(3);
+  output()->resize(output_shape);
+}
+
+void ResizeBilinear::execute() const
+{
+  tflite::ResizeBilinearParams op_params{};
+  op_params.align_corners = params().align_corners;
+  op_params.half_pixel_centers = params().half_pixel_centers;
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::ResizeBilinear(
+        op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::ResizeBilinear(
+        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.h b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.h
new file mode 100644
index 000000000..b7bdc2ab7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ResizeBilinear : public KernelWithParams<ResizeBilinearParams>
+{
+public:
+  ResizeBilinear(const Tensor *input, const Tensor *shape, Tensor *output,
+                 const ResizeBilinearParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *size() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
new file mode 100644
index 000000000..933a1128c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
@@ -0,0 +1,255 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeBilinear.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> size_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data,
+           std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data,
+           bool align_corners, bool half_pixel_centers)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = align_corners;
+  params.half_pixel_centers = half_pixel_centers;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> size_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<int32_t> size_data,
+                    std::initializer_list<float> output_data, bool align_corners,
+                    bool half_pixel_centers)
+{
+  // On TFlite example use Uint8 value it self, so this means quant param scale 1.0f and zero
+  // point 0.
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, 1.0, 0, input_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0, 0);
+
+  ResizeBilinearParams params{};
+  params.align_corners = align_corners;
+  params.half_pixel_centers = half_pixel_centers;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+}
+
+template <typename T> class ResizeBilinearTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(ResizeBilinearTest, DataTypes);
+
+TYPED_TEST(ResizeBilinearTest, SimpleTest)
+{
+  Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                   {
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     10, 16 //
+                   },
+                   {3, 3},
+                   {
+                     3, 5, 6,    //
+                     7, 9, 10,   //
+                     9, 11, 12,  //
+                     4, 8, 10,   //
+                     8, 12, 14,  //
+                     10, 14, 16, //
+                   },
+                   false, false);
+  SUCCEED();
+}
+
+TEST(ResizeBilinearTest, HalfPixelCenterFloatTest)
+{
+  Check<float>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+               {
+                 1, 2, //
+                 3, 4, //
+                 1, 2, //
+                 3, 4  //
+               },
+               {3, 3},
+               {
+                 1, 1.5, 2, //
+                 2, 2.5, 3, //
+                 3, 3.5, 4, //
+                 1, 1.5, 2, //
+                 2, 2.5, 3, //
+                 3, 3.5, 4, //
+               },
+               false, true);
+  SUCCEED();
+}
+
+TEST(ResizeBilinearTest, HalfPixelCenterUint8Test)
+{
+  Check<uint8_t>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                 {
+                   3, 6,  //
+                   9, 12, //
+                   4, 10, //
+                   12, 16 //
+                 },
+                 {3, 3},
+                 {
+                   2, 4, 6,    //
+                   6, 7, 9,    //
+                   9, 10, 12,  //
+                   4, 7, 10,   //
+                   8, 10, 13,  //
+                   12, 14, 16, //
+                 },
+                 false, true);
+  SUCCEED();
+}
+
+TEST(ResizeBilinearTest, InputShapeInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeBilinearTest, SizeShapeInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeBilinearTest, SizeDimInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeBilinearTest, InvalidParams_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = true;
+  params.half_pixel_centers = true;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
new file mode 100644
index 000000000..306cefbc2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeNearestNeighbor.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+#include "PALResizeNearestNeighbor.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ResizeNearestNeighbor::ResizeNearestNeighbor(const Tensor *input, const Tensor *size,
+                                             Tensor *output,
+                                             const ResizeNearestNeighborParams &params)
+  : KernelWithParams<ResizeNearestNeighborParams>({input, size}, {output}, params)
+{
+}
+
+void ResizeNearestNeighbor::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2);
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = getTensorData<int32_t>(size())[0];
+  output_shape.dim(2) = getTensorData<int32_t>(size())[1];
+  output_shape.dim(3) = input()->shape().dim(3);
+  output()->resize(output_shape);
+}
+
+void ResizeNearestNeighbor::execute() const
+{
+  tflite::ResizeNearestNeighborParams op_params{};
+  op_params.align_corners = params().align_corners;
+  op_params.half_pixel_centers = params().half_pixel_centers;
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::ResizeNearestNeighbor(
+        op_params, getTensorShape(input()), getTensorData<int32_t>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<int32_t>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::ResizeNearestNeighbor(
+        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h
new file mode 100644
index 000000000..137d031cf
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ResizeNearestNeighbor : public KernelWithParams<ResizeNearestNeighborParams>
+{
+public:
+  ResizeNearestNeighbor(const Tensor *input, const Tensor *shape, Tensor *output,
+                        const ResizeNearestNeighborParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *size() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
new file mode 100644
index 000000000..7ade02a6f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeNearestNeighbor.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> size_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data,
+           std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data,
+           bool align_corners, bool half_pixel_centers)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = align_corners;
+  params.half_pixel_centers = half_pixel_centers;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> size_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<int32_t> size_data,
+                    std::initializer_list<float> output_data, bool align_corners,
+                    bool half_pixel_centers)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::pair<float, int32_t> quant_param =
+    quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
+                                std::max(input_data) > 0 ? std::max(input_data) : 0.f);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.first);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = align_corners;
+  params.half_pixel_centers = half_pixel_centers;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+}
+
+template <typename T> class ResizeNearestNeighborTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(ResizeNearestNeighborTest, DataTypes);
+
+TYPED_TEST(ResizeNearestNeighborTest, SimpleTest)
+{
+  Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                   {
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     10, 16 //
+                   },
+                   {3, 3},
+                   {
+                     3, 3, 6,    //
+                     3, 3, 6,    //
+                     9, 9, 12,   //
+                     4, 4, 10,   //
+                     4, 4, 10,   //
+                     10, 10, 16, //
+                   },
+                   false, false);
+}
+
+TYPED_TEST(ResizeNearestNeighborTest, AlignCenterTest)
+{
+  Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                   {
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     10, 16 //
+                   },
+                   {3, 3},
+                   {
+                     3, 6, 6,    //
+                     9, 12, 12,  //
+                     9, 12, 12,  //
+                     4, 10, 10,  //
+                     10, 16, 16, //
+                     10, 16, 16, //
+                   },
+                   true, false);
+}
+
+TYPED_TEST(ResizeNearestNeighborTest, HalfPixelCenterTest)
+{
+  Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                   {
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     10, 16 //
+                   },
+                   {3, 3},
+                   {
+                     3, 6, 6,    //
+                     9, 12, 12,  //
+                     9, 12, 12,  //
+                     4, 10, 10,  //
+                     10, 16, 16, //
+                     10, 16, 16, //
+                   },
+                   false, true);
+}
+
+TEST(ResizeNearestNeighborTest, InputShapeInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeNearestNeighborTest, SizeShapeInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeNearestNeighborTest, SizeDimInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.cpp
new file mode 100644
index 000000000..1b6a5cc3b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReverseV2.h"
+#include "kernels/Utils.h"
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+ReverseV2::ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output)
+  : Kernel({input, axes}, {output})
+{
+}
+
+void ReverseV2::configure()
+{
+  assert(axes()->shape().num_dims() == 1);
+  assert(input()->shape().num_dims() >= axes()->shape().num_elements());
+  if (input()->element_type() != DataType::S32 && input()->element_type() != DataType::FLOAT32 &&
+      input()->element_type() != DataType::U8 && input()->element_type() != DataType::S16 &&
+      input()->element_type() != DataType::S64)
+  {
+    throw std::runtime_error("Unsupported input type.");
+  }
+  if (axes()->element_type() != DataType::S32)
+  {
+    throw std::runtime_error("Unsupported axes type.");
+  }
+  if (axes()->shape().num_elements() > 1)
+  {
+    throw std::runtime_error("Current implementation does not support more than 1 axis.");
+  }
+  int axis_value = getTensorData<int32_t>(axes())[0];
+  if (axis_value < 0 || axis_value >= input()->shape().num_dims())
+  {
+    throw std::runtime_error("Invalid axes value");
+  }
+  assert(input()->element_type() == output()->element_type());
+
+  output()->resize(input()->shape());
+}
+
+void ReverseV2::execute() const
+{
+  int axis_value = getTensorData<int32_t>(axes())[0];
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::Reverse<float>(axis_value, getTensorShape(input()),
+                                            getTensorData<float>(input()), getTensorShape(output()),
+                                            getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      tflite::reference_ops::Reverse<uint8_t>(
+        axis_value, getTensorShape(input()), getTensorData<uint8_t>(input()),
+        getTensorShape(output()), getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported output type");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.h b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.h
new file mode 100644
index 000000000..51211c703
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_REVERSE_H
+#define LUCI_INTERPRETER_KERNELS_REVERSE_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ReverseV2 : public Kernel
+{
+public:
+  ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axes() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_REVERSE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp
new file mode 100644
index 000000000..c0025faca
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReverseV2.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class ReverseV2Test : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(ReverseV2Test, DataTypes);
+
+TYPED_TEST(ReverseV2Test, MultiDimensions)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  // TypeParam
+  std::vector<TypeParam> input_data{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+                                    13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
+  Shape input_shape{4, 3, 2};
+  std::vector<int32_t> axis_data{1};
+  Shape axis_shape{1};
+
+  std::vector<TypeParam> output_data{5,  6,  3,  4,  1,  2,  11, 12, 9,  10, 7,  8,
+                                     17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20};
+  std::vector<int32_t> output_shape{4, 3, 2};
+
+  Tensor input_tensor =
+    makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>(axis_shape, axis_data, memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+
+  ReverseV2 kernel = ReverseV2(&input_tensor, &axis_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+              ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.cpp
new file mode 100644
index 000000000..6dd92dc98
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Rsqrt.h"
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Rsqrt::Rsqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Rsqrt::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    throw std::runtime_error("Input/output tensor data type mismatch.");
+  }
+  output()->resize(input()->shape());
+}
+
+void Rsqrt::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Rsqrt::evalFloat() const
+{
+  auto in = getTensorData<float>(input());
+  auto out = getTensorData<float>(output());
+  auto size = getTensorShape(input()).FlatSize();
+  for (auto i = in; i != in + size; ++i)
+  {
+    *out = 1.f / std::sqrt(*i);
+    ++out;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.h b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.h
new file mode 100644
index 000000000..adc5bcfa2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RSQRT_H
+#define LUCI_INTERPRETER_KERNELS_RSQRT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Rsqrt : public Kernel
+{
+public:
+  Rsqrt(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RSQRT_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp
new file mode 100644
index 000000000..3c6494232
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Rsqrt.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Rsqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(RsqrtTest, SimpleRsqrt)
+{
+  Check(
+    /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+    /*input_data=*/
+    {
+      5, 4, 8, 2,     //
+      6, 7.5, 9, 0.3, //
+    },
+    /*output_data=*/
+    {
+      0.44721360, 0.5, 0.35355339, 0.70710678,       //
+      0.40824829, 0.36514837, 0.33333333, 1.8257419, //
+    });
+}
+
+TEST(RsqrtTest, Input_Output_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Rsqrt kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(RsqrtTest, Invalid_Input_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Rsqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.cpp
new file mode 100644
index 000000000..40d79aaa3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SVDF.h"
+#include "kernels/Utils.h"
+#include "PALSVDF.h"
+
+#include <tensorflow/lite/kernels/internal/quantization_util.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+TfLiteFusedActivation get_tflite_activation(Activation activation)
+{
+  switch (activation)
+  {
+    case luci::FusedActFunc::RELU:
+      return kTfLiteActRelu;
+    case luci::FusedActFunc::RELU6:
+      return kTfLiteActRelu6;
+    case luci::FusedActFunc::RELU_N1_TO_1:
+      return kTfLiteActReluN1To1;
+    case luci::FusedActFunc::TANH:
+      return kTfLiteActTanh;
+    case luci::FusedActFunc::SIGN_BIT:
+      return kTfLiteActSignBit;
+    case luci::FusedActFunc::NONE:
+      return kTfLiteActNone;
+    default:
+      throw std::runtime_error("Unsupported activation type");
+  }
+}
+} // namespace
+
+SVDF::SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
+           const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
+           Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
+           Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
+           const SVDFParams &params)
+  : KernelWithParams<SVDFParams>({input, weight_feature, weight_time, bias, input_activation_state},
+                                 {output, scratchpad_activation_state, scratchpad_1, scratchpad_2,
+                                  scratchpad_3, scratchpad_4, scratchpad_5, scratchpad_6},
+                                 params)
+{
+  // Do nothing
+}
+
+void SVDF::configure()
+{
+  const Shape &input_shape = input()->shape();
+  const Shape &weight_features_shape = weight_feature()->shape();
+  const Shape &weight_time_shape = weight_time()->shape();
+
+  // Validate Input Tensor:
+  LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::FLOAT32 ||
+                         input()->element_type() == loco::DataType::S8);
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 2);
+
+  // Validate inputs and output types
+  if (input()->element_type() == loco::DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::S8);
+    LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::S16 ||
+                           weight_time()->element_type() == loco::DataType::S8);
+    if (bias())
+      LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::S32);
+
+    LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::S16 ||
+                           input_activation_state()->element_type() == loco::DataType::S8);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8);
+
+    // Note: now tflite support only ReLU activation for integer SVDF
+    LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::RELU);
+  }
+  else if (weight_feature()->element_type() == loco::DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::FLOAT32);
+    if (bias())
+      LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32);
+  }
+  else if ((weight_feature()->element_type() == loco::DataType::U8 ||
+            weight_feature()->element_type() == loco::DataType::S8) &&
+           input()->element_type() == loco::DataType::FLOAT32)
+  {
+    // TODO:: support hybrid SVDF op
+    throw std::runtime_error("Hybrid type is not currently supported");
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+
+  // Check all the parameters of tensor match within themselves and match the
+  // input configuration.
+  const int rank = params().svdf_rank;
+  const int batch_size = input_shape.dim(0);
+  const int num_filters = weight_features_shape.dim(0);
+  LUCI_INTERPRETER_CHECK(rank != 0);
+  LUCI_INTERPRETER_CHECK(num_filters % rank == 0);
+
+  const int num_units = num_filters / rank;
+  const int memory_size = weight_time_shape.dim(1);
+
+  // Validate Weight_Feature Input Tensor:
+  LUCI_INTERPRETER_CHECK(weight_features_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(weight_features_shape.dim(1) == input_shape.dim(1));
+
+  // Validate Weight_Time Input Tensor:
+  LUCI_INTERPRETER_CHECK(weight_time_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(weight_time_shape.dim(0) == num_filters);
+
+  // Validate Bias
+  if (bias())
+    LUCI_INTERPRETER_CHECK(bias()->shape().dim(0) == num_units);
+
+  // Validate Input Activation State
+  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(0) == batch_size);
+  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(1) == memory_size * num_filters);
+
+  // Resize scratchpad_state to input_activation_state
+  auto scratchpad_activation_state = getOutputTensors()[1];
+  scratchpad_activation_state->resize({batch_size, memory_size * num_filters});
+
+  // Resize output tensor
+  output()->resize({batch_size, num_units});
+
+  luci_interpreter_pal::SetupScratchpadTensor(
+    input()->element_type(), weight_feature()->element_type(), getOutputTensors()[2],
+    getOutputTensors()[3], getOutputTensors()[4], getOutputTensors()[5], getOutputTensors()[6],
+    getOutputTensors()[7], input_shape, weight_time_shape, batch_size, num_filters, num_units);
+}
+
+void SVDF::execute() const
+{
+  switch (weight_feature()->element_type())
+  {
+    case loco::DataType::FLOAT32:
+      evalFloat();
+      break;
+    case loco::DataType::S8:
+    {
+      if (input()->element_type() == loco::DataType::S8)
+        evalInteger();
+      else
+        // TODO:: support hybrid SVDF op
+        throw std::runtime_error("Hybrid type is not currently supported");
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type");
+  }
+}
+
+void SVDF::evalInteger() const
+{
+  const auto effective_scale_1 = static_cast<double>(input()->scale() * weight_feature()->scale() /
+                                                     input_activation_state()->scale());
+  const auto effective_scale_2 = static_cast<double>(input_activation_state()->scale() *
+                                                     weight_time()->scale() / output()->scale());
+
+  int32_t effective_scale_1_a;
+  int effective_scale_1_b;
+  int32_t effective_scale_2_a;
+  int effective_scale_2_b;
+
+  tflite::QuantizeMultiplier(effective_scale_1, &effective_scale_1_a, &effective_scale_1_b);
+  tflite::QuantizeMultiplier(effective_scale_2, &effective_scale_2_a, &effective_scale_2_b);
+
+  TfLiteSVDFParams params_svdf{};
+  params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+  params_svdf.rank = params().svdf_rank;
+  params_svdf.activation = get_tflite_activation(params().activation);
+
+  auto scratchpad_activation_state = getOutputTensors()[1];
+  // Note: it is expected that activation_state input variable tensor reset to zero,
+  // also expected that this variable tensor doesn't have buffer
+  auto scratchpad_data = getTensorData<int16_t>(scratchpad_activation_state);
+  std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+  auto scratchpad = getOutputTensors()[2];
+  auto output_temp = getOutputTensors()[3];
+
+  int32_t input_zp = input()->zero_point();
+  int32_t output_zp = output()->zero_point();
+  luci_interpreter_pal::IntegerSVDF(
+    params_svdf, getTensorShape(input()), getTensorData<int8_t>(input()),
+    getTensorShape(weight_feature()), getTensorData<int8_t>(weight_feature()),
+    getTensorShape(weight_time()), getTensorData<int16_t>(weight_time()), getTensorShape(bias()),
+    getTensorData<int32_t>(bias()), scratchpad_data, getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorData<int32_t>(scratchpad),
+    getTensorData<int32_t>(output_temp), effective_scale_1_a, effective_scale_1_b,
+    effective_scale_2_a, effective_scale_2_b, input_zp, output_zp);
+}
+
+void SVDF::evalFloat() const
+{
+  TfLiteSVDFParams params_svdf{};
+  params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+  params_svdf.rank = params().svdf_rank;
+  params_svdf.activation = get_tflite_activation(params().activation);
+
+  auto scratchpad_activation_state = getOutputTensors()[1];
+  // Note: it is expected that activation_state input variable tensor reset to zero,
+  // also expected that this variable tensor doesn't have buffer
+  auto scratchpad_data = getTensorData<float>(scratchpad_activation_state);
+  std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+  auto scratchpad_1 = getOutputTensors()[2];
+
+  luci_interpreter_pal::FloatSVDF(
+    params_svdf, getTensorShape(input()), getTensorData<float>(input()),
+    getTensorShape(weight_feature()), getTensorData<float>(weight_feature()),
+    getTensorShape(weight_time()), getTensorData<float>(weight_time()), getTensorShape(bias()),
+    getTensorData<float>(bias()), getTensorData<float>(scratchpad_1), scratchpad_data,
+    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.h b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.h
new file mode 100644
index 000000000..335a6cd8f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SVDF_H
+#define LUCI_INTERPRETER_KERNELS_SVDF_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SVDF : public KernelWithParams<SVDFParams>
+{
+public:
+  SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
+       const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
+       Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
+       Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
+       const SVDFParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *weight_feature() const { return _inputs[1]; }
+  const Tensor *weight_time() const { return _inputs[2]; }
+  const Tensor *bias() const { return _inputs[3]; }
+  const Tensor *input_activation_state() const { return _inputs[4]; }
+
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalInteger() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SVDF_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.test.cpp
new file mode 100644
index 000000000..82bd9b009
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.test.cpp
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SVDF.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class SVDFTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(SVDFTest, FullIntegerTest)
+{
+  const int32_t batches = 2;
+  const int32_t input_size = 3;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, input_size};
+  Shape weight_feature_shape{num_filters, input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape bias_shape{units};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<float> input_data{0.49837467, 0.19278903, 0.26584083,
+                                0.17660543, 0.52949083, -0.77931279};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  std::vector<float> bias_data{-0.0976817, 0.15294972, 0.39635518, -0.02702999};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-1, 1);
+  std::pair<float, int32_t> weight_feature_quant_param = quantizationParams<int8_t>(-0.5, 0.5);
+  std::pair<float, int32_t> weight_time_quant_param = quantizationParams<int16_t>(-1, 1);
+  std::pair<float, int32_t> bias_quant_param = quantizationParams<int32_t>(-512, 512);
+  std::pair<float, int32_t> activation_state_quant_param = quantizationParams<int16_t>(-16, 16);
+
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-0.5, 0.5);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::S8>(
+    weight_feature_shape, weight_feature_quant_param.first, weight_feature_quant_param.second,
+    weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor = makeInputTensor<DataType::S16>(
+    weight_time_shape, weight_time_quant_param.first, weight_time_quant_param.second,
+    weight_time_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(
+    bias_shape, bias_quant_param.first, bias_quant_param.second, bias_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(
+    DataType::S16, activation_state_quant_param.first, activation_state_quant_param.second);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+  Tensor scratchpad_activation_state(DataType::S16, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::S32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::S32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::RELU;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, &bias_tensor,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad_activation_state);
+  _memory_manager->allocate_memory(scratchpad_1);
+  _memory_manager->allocate_memory(scratchpad_2);
+  _memory_manager->allocate_memory(scratchpad_3);
+  _memory_manager->allocate_memory(scratchpad_4);
+  _memory_manager->allocate_memory(scratchpad_5);
+  _memory_manager->allocate_memory(scratchpad_6);
+  kernel.execute();
+
+  std::vector<int8_t> ref_output_data{-9, 24, 31, 1, -10, 10, -3, 0};
+
+  std::vector<int32_t> ref_output_shape{batches, units};
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor), ref_output_data);
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(SVDFTest, FloatTest)
+{
+  const int32_t batches = 2;
+  const int32_t input_size = 3;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, input_size};
+  Shape weight_feature_shape{num_filters, input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<float> input_data{0.12609188, -0.46347019, -0.89598465,
+                                0.35867718, 0.36897406,  0.73463392};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+    weight_feature_shape, weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor =
+    makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::NONE;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad_activation_state);
+  _memory_manager->allocate_memory(scratchpad_1);
+  _memory_manager->allocate_memory(scratchpad_2);
+  _memory_manager->allocate_memory(scratchpad_3);
+  _memory_manager->allocate_memory(scratchpad_4);
+  _memory_manager->allocate_memory(scratchpad_5);
+  _memory_manager->allocate_memory(scratchpad_6);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.014899,    -0.0517661, -0.143725, -0.00271883,
+                                     -0.03004015, 0.09565311, 0.1587342, 0.00784263};
+
+  std::vector<float> ref_output_shape{batches, units};
+  const float tolerance = 1e-5;
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(SVDFTest, Unsupported_Type_Configure_NEG)
+{
+  const int32_t batches = 2;
+  const int32_t input_size = 3;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, input_size};
+  Shape weight_feature_shape{num_filters, input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<int32_t> input_data{0, 1, 3, 4, 4, -2};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+    weight_feature_shape, weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor =
+    makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::NONE;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SVDFTest, Invalid_Input_Shape_NEG)
+{
+  const int32_t batches = 2;
+  const int32_t right_input_size = 3;
+  const int32_t wrong_input_size = 4;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, wrong_input_size};
+  Shape weight_feature_shape{num_filters, right_input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<float> input_data{0, 1, 3, 2, 4, 4, -2, 1};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+    weight_feature_shape, weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor =
+    makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::NONE;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Shape.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.cpp
new file mode 100644
index 000000000..0429fe1e5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Shape.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ShapeKernel::ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams &params)
+  : KernelWithParams<ShapeParams>({input}, {output}, params)
+{
+}
+
+void ShapeKernel::configure()
+{
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S32 or
+                         output()->element_type() == DataType::S64);
+  const auto input_shape = input()->shape();
+
+  Shape output_shape(1);
+  output_shape.dim(0) = input_shape.num_dims();
+
+  output()->resize(output_shape);
+}
+
+void ShapeKernel::execute() const
+{
+  switch (params().out_type)
+  {
+    case DataType::S32:
+      evalInt<int32_t>();
+      break;
+    case DataType::S64:
+      evalInt<int64_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void ShapeKernel::evalInt() const
+{
+  const auto input_shape = input()->shape();
+
+  auto output_data = getTensorData<T>(output());
+
+  for (int i = 0; i < input_shape.num_dims(); ++i)
+  {
+    output_data[i] = input_shape.dim(i);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Shape.h b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.h
new file mode 100644
index 000000000..cfaadec91
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SHAPE_H
+#define LUCI_INTERPRETER_KERNELS_SHAPE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ShapeKernel : public KernelWithParams<ShapeParams>
+{
+public:
+  ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void evalInt() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SHAPE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Shape.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.test.cpp
new file mode 100644
index 000000000..4763e016c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.test.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Shape.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ShapeTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+template <typename T> void runShapeKernel(loco::DataType dataType, IMemoryManager *memory_manager)
+{
+  Shape input_shape{1, 3, 1, 3, 5};
+
+  Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, "");
+  Tensor output_tensor = makeOutputTensor(dataType);
+
+  ShapeParams params{};
+  params.out_type = dataType;
+
+  ShapeKernel kernel(&input_tensor, &output_tensor, params);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<T> ref_output_data{1, 3, 1, 3, 5};
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ref_output_data);
+
+  std::vector<int32_t> ref_output_shape{5};
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(ShapeTest, OutTypeInt)
+{
+
+  // Run for int32_t output
+  runShapeKernel<int32_t>(loco::DataType::S32, _memory_manager.get());
+  // Run for int64_t output
+  runShapeKernel<int64_t>(loco::DataType::S64, _memory_manager.get());
+
+  SUCCEED();
+}
+
+TEST_F(ShapeTest, Invalid_Output_Type_NEG)
+{
+  Shape input_shape{1, 3};
+
+  Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, "");
+  Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+  ShapeParams params{};
+  params.out_type = loco::DataType::FLOAT32;
+
+  ShapeKernel kernel(&input_tensor, &output_tensor, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Slice.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.cpp
new file mode 100644
index 000000000..2fe2c5471
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Slice.h"
+#include "Utils.h"
+#include "PALSlice.h"
+
+#include <cassert>
+#include <cstring>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+const int max_dim = 4;
+
+Slice::Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output)
+  : Kernel({input, begin, size}, {output})
+{
+}
+
+template <typename T>
+Shape calculateOutputShape(const Tensor *input, const Tensor *begin, const Tensor *size)
+{
+  Shape output_shape = Shape(input->shape().num_dims());
+  for (int idx = 0; idx < input->shape().num_dims(); idx++)
+  {
+    T size_value = getTensorData<T>(size)[idx];
+    if (size_value < 0)
+    {
+      if (size_value != -1)
+      {
+        throw std::runtime_error("Invalid size.");
+      }
+      size_value = input->shape().dim(idx) - getTensorData<T>(begin)[idx];
+    }
+    else
+    {
+      if (input->shape().dim(idx) < getTensorData<T>(begin)[idx] + size_value)
+      {
+        throw std::runtime_error("Invalid begin and size.");
+      }
+    }
+    output_shape.dim(idx) = static_cast<int>(size_value);
+  }
+  return output_shape;
+}
+
+template <typename T>
+void getBeginAndSizeVectors(int dimensions, const Tensor *begin, const Tensor *size,
+                            std::vector<int> *begins, std::vector<int> *sizes)
+{
+  for (int idx = dimensions - 1; idx >= 0; --idx)
+  {
+    begins->push_back(getTensorData<T>(begin)[idx]);
+    sizes->push_back(getTensorData<T>(size)[idx]);
+  }
+}
+
+void Slice::configure()
+{
+  assert(input()->element_type() == output()->element_type());
+  assert(begin()->element_type() == DataType::S32 || begin()->element_type() == DataType::S64);
+  assert(size()->element_type() == DataType::S32 || size()->element_type() == DataType::S64);
+  assert(begin()->shape().num_dims() == 1);
+  assert(size()->shape().num_dims() == 1);
+  assert(input()->shape().num_dims() <= max_dim);
+
+  if (begin()->element_type() == DataType::S32)
+  {
+    output()->resize(calculateOutputShape<int32_t>(input(), begin(), size()));
+  }
+  else if (begin()->element_type() == DataType::S64)
+  {
+    output()->resize(calculateOutputShape<int64_t>(input(), begin(), size()));
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Slice::execute() const
+{
+  std::vector<int> begins;
+  begins.reserve(max_dim);
+  std::vector<int> sizes;
+  sizes.reserve(max_dim);
+  if (begin()->element_type() == DataType::S32)
+  {
+    getBeginAndSizeVectors<int32_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes);
+  }
+  else if (begin()->element_type() == DataType::S64)
+  {
+    getBeginAndSizeVectors<int64_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes);
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported begin type.");
+  }
+  for (int i = input()->shape().num_dims(); i < max_dim; ++i)
+  {
+    begins.push_back(0);
+    sizes.push_back(1);
+  }
+
+  assert(begins.size() == 4);
+  assert(sizes.size() == 4);
+  tflite::SliceParams op_params{};
+  op_params.begin_count = 4;
+  op_params.size_count = 4;
+  for (int i = 0; i < 4; i++)
+  {
+    op_params.begin[i] = begins[3 - i];
+    op_params.size[i] = sizes[3 - i];
+  }
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::Slice(op_params, getTensorShape(input()), getTensorData<float>(input()),
+                                  getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
+                                  getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                  getTensorData<uint8_t>(output()));
+      break;
+    case DataType::S8:
+      luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
+                                  getTensorData<int8_t>(input()), getTensorShape(output()),
+                                  getTensorData<int8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported input type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Slice.h b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.h
new file mode 100644
index 000000000..23c359608
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SLICE_H
+#define LUCI_INTERPRETER_KERNELS_SLICE_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Slice : public Kernel
+{
+public:
+  Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *begin() const { return _inputs[1]; }
+  const Tensor *size() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SLICE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Slice.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.test.cpp
new file mode 100644
index 000000000..517982990
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.test.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Slice.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class SliceTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(SliceTest, DataTypes);
+
+TYPED_TEST(SliceTest, SimpleTest)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::vector<TypeParam> input_data{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6};
+  Shape input_shape{3, 2, 3, 1};
+  std::vector<int32_t> begin_data{1, 0, 0, 0};
+  Shape begin_shape{4};
+  std::vector<int32_t> size_data{2, 1, -1, 1};
+  Shape size_shape{4};
+  std::vector<TypeParam> output_data{3, 3, 3, 5, 5, 5};
+  std::vector<int32_t> output_shape{2, 1, 3, 1};
+
+  Tensor input_tensor =
+    makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
+  Tensor begin_tensor =
+    makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+
+  Slice kernel(&input_tensor, &begin_tensor, &size_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+              ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.cpp
new file mode 100644
index 000000000..c230aaa70
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Softmax.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+#include "PALSoftmax.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Softmax::Softmax(const Tensor *input, Tensor *output, const SoftmaxParams &params)
+  : KernelWithParams<SoftmaxParams>({input}, {output}, params)
+{
+}
+
+void Softmax::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= 1);
+  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8 || output()->zero_point() == 0);
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8 ||
+                           output()->zero_point() == std::numeric_limits<int8_t>::min());
+    tflite::SoftmaxParams op_params{};
+    op_params.table = _table;
+    luci_interpreter_pal::PopulateSoftmaxLookupTable(&op_params, input()->scale(), params().beta);
+  }
+  output()->resize(input()->shape());
+}
+
+void Softmax::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S8:
+      evalQuantized<int8_t>();
+      break;
+    case DataType::U8:
+      evalQuantized<uint8_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Softmax::evalFloat() const
+{
+  tflite::SoftmaxParams op_params{};
+  op_params.beta = params().beta;
+
+  tflite::reference_ops::Softmax(op_params, getTensorShape(input()), getTensorData<float>(input()),
+                                 getTensorShape(output()), getTensorData<float>(output()));
+}
+
+template <typename T> void Softmax::evalQuantized() const
+{
+  tflite::SoftmaxParams op_params{};
+  op_params.table = const_cast<float *>(_table);
+  op_params.zero_point = output()->zero_point();
+  op_params.scale = output()->scale();
+  luci_interpreter_pal::InitializeParams(&op_params, input()->scale(), params().beta);
+  luci_interpreter_pal::Softmax(op_params, getTensorShape(input()), getTensorData<T>(input()),
+                                getTensorShape(output()), getTensorData<T>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.h b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.h
new file mode 100644
index 000000000..1f281df1c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SOFTMAX_H
+#define LUCI_INTERPRETER_KERNELS_SOFTMAX_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Softmax : public KernelWithParams<SoftmaxParams>
+{
+public:
+  Softmax(const Tensor *input, Tensor *output, const SoftmaxParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalQuantized() const;
+
+  float _table[256];
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.test.cpp
new file mode 100644
index 000000000..08e70672d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.test.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Softmax.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> constexpr loco::DataType toLocoDataType();
+
+template <> constexpr loco::DataType toLocoDataType<float>() { return loco::DataType::FLOAT32; }
+
+template <> constexpr loco::DataType toLocoDataType<uint8_t>() { return loco::DataType::U8; }
+
+template <> constexpr loco::DataType toLocoDataType<int8_t>() { return loco::DataType::S8; }
+
+template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<toLocoDataType<T>()>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(toLocoDataType<T>());
+
+  SoftmaxParams params{};
+  params.beta = 0.1;
+
+  Softmax kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::pair<float, int32_t> input_quant_param =
+    quantizationParams<T>(std::min<float>(std::min<float>(input_data), 0.f),
+                          std::max<float>(std::max<float>(input_data), 0.f));
+  std::pair<float, int32_t> output_quant_param =
+    quantizationParams<T>(std::min<float>(std::min<float>(output_data), 0.f),
+                          std::max<float>(std::max<float>(output_data), 0.f));
+  Tensor input_tensor = makeInputTensor<toLocoDataType<T>()>(input_shape, input_quant_param.first,
+                                                             input_quant_param.second, input_data,
+                                                             memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(toLocoDataType<T>(), output_quant_param.first, output_quant_param.second);
+
+  SoftmaxParams params{};
+  params.beta = 0.1;
+
+  Softmax kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+}
+
+template <typename T> class SoftmaxTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(SoftmaxTest, DataTypes);
+
+TYPED_TEST(SoftmaxTest, Simple)
+{
+  Check<TypeParam>({2, 1, 2, 3}, {2, 1, 2, 3},
+                   {
+                     5, -9, 8,  //
+                     -7, 2, -4, //
+                     1, -2, 9,  //
+                     3, -6, -1, //
+                   },
+                   {
+                     0.38514, 0.09497, 0.51989, //
+                     0.20792, 0.51141, 0.28067, //
+                     0.25212, 0.18678, 0.56110, //
+                     0.48149, 0.19576, 0.32275, //
+                   });
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp
new file mode 100644
index 000000000..630cd38c4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToBatchND.h"
+#include "kernels/Utils.h"
+
+#include "PALSpaceToBatchND.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+const int kInputMinDimensionNum = 3;
+const int kInputMaxDimensionNum = 4;
+
+} // namespace
+
+SpaceToBatchND::SpaceToBatchND(const Tensor *input, const Tensor *block_shape,
+                               const Tensor *paddings, Tensor *output)
+  : Kernel({input, block_shape, paddings}, {output})
+{
+}
+
+void SpaceToBatchND::configure()
+{
+  const auto *block_shape_data = block_shape()->data<int32_t>();
+  const auto *paddings_data = paddings()->data<int32_t>();
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  int spatial_dims_num = input()->shape().num_dims() - 2;
+
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
+
+  LUCI_INTERPRETER_CHECK(paddings()->shape().num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(paddings()->shape().dim(0) == spatial_dims_num);
+  LUCI_INTERPRETER_CHECK(paddings()->shape().dim(1) == 2);
+
+  Shape output_shape = Shape(input()->shape().num_dims());
+  int output_batch_size = input()->shape().dim(0);
+  for (int i = 0; i < spatial_dims_num; ++i)
+  {
+    int final_dim_size =
+      (input()->shape().dim(i + 1) + paddings_data[i * 2] + paddings_data[i * 2 + 1]);
+    LUCI_INTERPRETER_CHECK(final_dim_size % block_shape_data[i] == 0);
+    output_shape.dim(i + 1) = final_dim_size / block_shape_data[i];
+    output_batch_size = output_batch_size * block_shape_data[i];
+  }
+  output_shape.dim(0) = output_batch_size;
+  output_shape.dim(input()->shape().num_dims() - 1) =
+    input()->shape().dim(input()->shape().num_dims() - 1);
+  output()->resize(output_shape);
+}
+
+void SpaceToBatchND::execute() const
+{
+  switch (input()->element_type())
+  {
+    tflite::SpaceToBatchParams op_params;
+    case DataType::FLOAT32:
+      op_params.output_offset = 0;
+      luci_interpreter_pal::SpaceToBatchND(
+        op_params, getTensorShape(input()), getTensorData<float>(input()),
+        getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
+        getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
+        getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      op_params.output_offset = output()->zero_point();
+      luci_interpreter_pal::SpaceToBatchND(
+        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+        getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
+        getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
+        getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.h
new file mode 100644
index 000000000..0893003bb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SpaceToBatchND : public Kernel
+{
+public:
+  SpaceToBatchND(const Tensor *input, const Tensor *block_shape, const Tensor *paddings,
+                 Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *block_shape() const { return _inputs[1]; }
+  const Tensor *paddings() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
new file mode 100644
index 000000000..3a8b0a812
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToBatchND.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape,
+           std::initializer_list<int32_t> block_shape_shape,
+           std::initializer_list<int32_t> paddings_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data,
+           std::initializer_list<int32_t> block_shape_data,
+           std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor block_shape_tensor =
+    makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <>
+void Check<uint8_t>(
+  std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> block_shape_shape,
+  std::initializer_list<int32_t> paddings_shape, std::initializer_list<int32_t> output_shape,
+  std::initializer_list<float> input_data, std::initializer_list<int32_t> block_shape_data,
+  std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::pair<float, int32_t> input_quant_param =
+    quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor block_shape_tensor =
+    makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, input_quant_param.first, input_quant_param.second);
+
+  SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T> class SpaceToBatchNDTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(SpaceToBatchNDTest, DataTypes);
+
+TYPED_TEST(SpaceToBatchNDTest, Simple)
+{
+  Check<TypeParam>(/*input_shape=*/{1, 5, 2, 1}, /*block_shape_shape=*/{2},
+                   /*paddings_shape=*/{2, 2},
+                   /*output_shape=*/{6, 2, 2, 1},
+                   /*input_data=*/{-1.0, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9, 1.0},
+                   /*block_shape_data=*/{3, 2}, /*paddings_data=*/{1, 0, 2, 0},
+                   /*output_data=*/{0, 0,   0, -0.5, 0, 0,    0, 0.6,  0, -1.0, 0, -0.7,
+                                    0, 0.2, 0, 0.8,  0, -0.3, 0, -0.9, 0, 0.4,  0, 1.0});
+}
+
+TEST(SpaceToBatchNDTest, Invalid_Shape_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, memory_manager.get());
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp
new file mode 100644
index 000000000..7c29e8cb0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SpaceToDepth.h"
+#include "Utils.h"
+#include "PALSpaceToDepth.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SpaceToDepth::SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams &params)
+  : KernelWithParams<SpaceToDepthParams>({input}, {output}, params)
+{
+}
+
+void SpaceToDepth::configure()
+{
+  assert(input()->shape().num_dims() == 4);
+  assert(output()->element_type() == DataType::FLOAT32 ||
+         output()->element_type() == DataType::U8 || output()->element_type() == DataType::S8 ||
+         output()->element_type() == DataType::S32 || output()->element_type() == DataType::S64);
+  assert(input()->element_type() == output()->element_type());
+
+  const int block_size = params().block_size;
+  const int32_t input_height = input()->shape().dim(1);
+  const int32_t input_width = input()->shape().dim(2);
+  int32_t output_height = input_height / block_size;
+  int32_t output_width = input_width / block_size;
+
+  assert(input_height == output_height * block_size);
+  assert(input_width == output_width * block_size);
+
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = output_height;
+  output_shape.dim(2) = output_width;
+  output_shape.dim(3) = input()->shape().dim(3) * block_size * block_size;
+
+  output()->resize(output_shape);
+}
+
+void SpaceToDepth::execute() const
+{
+  tflite::SpaceToDepthParams op_params{};
+  op_params.block_size = params().block_size;
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
+                                         getTensorData<float>(input()), getTensorShape(output()),
+                                         getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
+                                         getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                         getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.h b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.h
new file mode 100644
index 000000000..e66316b11
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H
+#define LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SpaceToDepth : public KernelWithParams<SpaceToDepthParams>
+{
+public:
+  SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
new file mode 100644
index 000000000..4af488618
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToDepth.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class SpaceToDepthTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(SpaceToDepthTest, DataTypes);
+
+TYPED_TEST(SpaceToDepthTest, SimpleCase)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  constexpr DataType element_type = getElementType<TypeParam>();
+  std::vector<TypeParam> input_data{1, 5, 6, 7, 2, 3, 4, 8};
+  Shape input_shape{1, 2, 2, 2};
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  std::vector<TypeParam> output_data{1, 5, 6, 7, 2, 3, 4, 8};
+  std::vector<int32_t> output_shape{1, 1, 1, 8};
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  SpaceToDepthParams params{};
+  params.block_size = 2;
+
+  SpaceToDepth kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+              ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Split.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Split.cpp
new file mode 100644
index 000000000..1a563f307
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Split.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Split.h"
+
+#include "Utils.h"
+
+#include "PALSplit.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Split::Split(const Tensor *axis, const Tensor *input, std::vector<Tensor *> outputs)
+  : Kernel({axis, input}, std::move(outputs))
+{
+}
+
+void Split::configure()
+{
+  assert(axis()->shape().num_elements() == 1);
+  _axis_value = getTensorData<int32_t>(axis())[0];
+  if (_axis_value < 0)
+    _axis_value += input()->shape().num_dims();
+  assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims());
+
+  const int32_t input_size = input()->shape().dim(_axis_value);
+  assert(input_size % _outputs.size() == 0);
+  const int32_t slice_size = input_size / _outputs.size();
+
+  Shape output_shape = input()->shape();
+  output_shape.dim(_axis_value) = slice_size;
+  for (Tensor *output : _outputs)
+  {
+    output->resize(output_shape);
+  }
+}
+
+void Split::execute() const
+{
+  tflite::SplitParams params{};
+  params.num_split = _outputs.size();
+  params.axis = _axis_value;
+
+#define TF_LITE_SPLIT(scalar)                                                                    \
+  {                                                                                              \
+    VectorOfTensors<scalar, false> all_outputs(_outputs);                                        \
+    luci_interpreter_pal::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
+                                all_outputs.shapes(), all_outputs.data());                       \
+  }
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      TF_LITE_SPLIT(float);
+      break;
+    case DataType::U8:
+      TF_LITE_SPLIT(uint8_t);
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+#undef TF_LITE_SPLIT
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Split.h b/compiler/luci-micro/luci-interpreter/src/kernels/Split.h
new file mode 100644
index 000000000..9542b1e56
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Split.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPLIT_H
+#define LUCI_INTERPRETER_KERNELS_SPLIT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Split : public Kernel
+{
+public:
+  Split(const Tensor *axis, const Tensor *input, std::vector<Tensor *> outputs);
+
+  const Tensor *axis() const { return _inputs[0]; }
+  const Tensor *input() const { return _inputs[1]; }
+  Tensor *output(int index) const { return _outputs[index]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  int32_t _axis_value{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPLIT_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Split.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Split.test.cpp
new file mode 100644
index 000000000..283cd9aa9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Split.test.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Split.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(int axis, int num_splits, std::initializer_list<int32_t> input_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
+           std::vector<std::vector<T>> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  constexpr DataType element_type = getElementType<T>();
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}, memory_manager.get());
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+
+  std::vector<Tensor> output_tensors;
+  output_tensors.reserve(num_splits);
+  for (int i = 0; i < num_splits; ++i)
+  {
+    output_tensors.emplace_back(makeOutputTensor(element_type));
+  }
+
+  std::vector<Tensor *> output_tensor_ptrs(num_splits);
+  for (int i = 0; i < num_splits; ++i)
+  {
+    output_tensor_ptrs[i] = &output_tensors[i];
+  }
+
+  Split kernel(&axis_tensor, &input_tensor, std::move(output_tensor_ptrs));
+  kernel.configure();
+  for (int i = 0; i < num_splits; ++i)
+  {
+    memory_manager->allocate_memory(output_tensors[i]);
+  }
+  kernel.execute();
+
+  for (int i = 0; i < num_splits; ++i)
+  {
+    EXPECT_THAT(extractTensorData<T>(output_tensors[i]),
+                ::testing::ElementsAreArray(output_data[i]));
+  }
+}
+
+template <typename T> class SplitTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(SplitTest, DataTypes);
+
+TYPED_TEST(SplitTest, FourDimensional)
+{
+  Check<TypeParam>(/*axis=*/0, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
+                   {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+                   {
+                     {1, 2, 3, 4, 5, 6, 7, 8},        //
+                     {9, 10, 11, 12, 13, 14, 15, 16}, //
+                   });
+  Check<TypeParam>(
+    /*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2},
+    {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    {
+      {1, 2, 3, 4, 9, 10, 11, 12},  //
+      {5, 6, 7, 8, 13, 14, 15, 16}, //
+    });
+  Check<TypeParam>(
+    /*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2},
+    {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    {
+      {1, 2, 5, 6, 9, 10, 13, 14},  //
+      {3, 4, 7, 8, 11, 12, 15, 16}, //
+    });
+  Check<TypeParam>(
+    /*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1},
+    {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    {
+      {1, 3, 5, 7, 9, 11, 13, 15},  //
+      {2, 4, 6, 8, 10, 12, 14, 16}, //
+    });
+}
+
+TYPED_TEST(SplitTest, OneDimensional)
+{
+  Check<TypeParam>(
+    /*axis=*/0, /*num_splits=*/8, {8}, {1}, {1, 2, 3, 4, 5, 6, 7, 8},
+    {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}});
+}
+
+TYPED_TEST(SplitTest, NegativeAxis)
+{
+  Check<TypeParam>(
+    /*axis=*/-4, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
+    {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    {
+      {1, 2, 3, 4, 5, 6, 7, 8}, //
+      {9, 10, 11, 12, 13, 14, 15, 16},
+    });
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.cpp
new file mode 100644
index 000000000..aa6820889
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SplitV.h"
+
+#include "Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SplitV::SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis,
+               std::vector<Tensor *> outputs)
+  : Kernel({input, size_splits, axis}, std::move(outputs))
+{
+}
+
+void SplitV::configure()
+{
+  assert(axis()->shape().num_elements() == 1);
+  _axis_value = getTensorData<int32_t>(axis())[0];
+  if (_axis_value < 0)
+    _axis_value += input()->shape().num_dims();
+  assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims());
+
+  auto num_split = static_cast<int32_t>(_outputs.size());
+  auto sizes_data = getTensorData<int32_t>(size_splits());
+
+  assert(size_splits()->shape().num_dims() == 1);
+
+  int32_t sum = 0;
+  const auto num_dims_size_spits = size_splits()->shape().dim(0);
+  int32_t count_neg_dim = 0;
+
+  for (int32_t i = 0; i < num_dims_size_spits - 1; ++i)
+  {
+    if (sizes_data[i] != -1)
+    {
+      sum += sizes_data[i];
+    }
+    else
+    {
+      count_neg_dim++;
+    }
+  }
+  assert(count_neg_dim < 2);
+  assert(size_splits()->shape().num_elements() == num_split);
+
+  auto output_shape = input()->shape();
+  for (int32_t i = 0; i < num_split; ++i)
+  {
+    if (sizes_data[i] == -1)
+    {
+      output_shape.dim(_axis_value) = input()->shape().dim(_axis_value) - sum;
+    }
+    else
+    {
+      output_shape.dim(_axis_value) = sizes_data[i];
+    }
+    _outputs[i]->resize(output_shape);
+  }
+}
+
+void SplitV::execute() const
+{
+  tflite::SplitParams params{};
+  params.num_split = _outputs.size();
+  params.axis = _axis_value;
+
+#define TF_LITE_SPLIT(scalar)                                                                     \
+  {                                                                                               \
+    VectorOfTensors<scalar, false> all_outputs(_outputs);                                         \
+    tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
+                                 all_outputs.shapes(), all_outputs.data());                       \
+  }
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      TF_LITE_SPLIT(float);
+      break;
+    case DataType::U8:
+      TF_LITE_SPLIT(uint8_t);
+      break;
+    case DataType::S16:
+      TF_LITE_SPLIT(int16_t);
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+#undef TF_LITE_SPLIT
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.h b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.h
new file mode 100644
index 000000000..92f6288fb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPLIT_V_H
+#define LUCI_INTERPRETER_KERNELS_SPLIT_V_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SplitV : public Kernel
+{
+public:
+  SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis,
+         std::vector<Tensor *> outputs);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *size_splits() const { return _inputs[1]; }
+  const Tensor *axis() const { return _inputs[2]; }
+  Tensor *output(int index) const { return _outputs[index]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  int32_t _axis_value{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPLIT_V_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.test.cpp
new file mode 100644
index 000000000..035bc2122
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.test.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SplitV.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(int axis, std::initializer_list<int32_t> splits_size,
+           std::initializer_list<int32_t> input_shape, std::initializer_list<T> input_data,
+           std::vector<std::vector<T>> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+
+  auto num_splits = static_cast<int32_t>(splits_size.size());
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor sizes_tensor =
+    makeInputTensor<DataType::S32>({num_splits}, splits_size, memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}, memory_manager.get());
+
+  std::vector<Tensor> output_tensors;
+  output_tensors.reserve(num_splits);
+  for (int i = 0; i < num_splits; ++i)
+  {
+    output_tensors.emplace_back(makeOutputTensor(element_type));
+  }
+
+  std::vector<Tensor *> output_tensor_ptrs(num_splits);
+  for (int i = 0; i < num_splits; ++i)
+  {
+    output_tensor_ptrs[i] = &output_tensors[i];
+  }
+
+  SplitV kernel(&input_tensor, &sizes_tensor, &axis_tensor, std::move(output_tensor_ptrs));
+  kernel.configure();
+  for (int i = 0; i < num_splits; ++i)
+  {
+    memory_manager->allocate_memory(output_tensors[i]);
+  }
+  kernel.execute();
+
+  for (int i = 0; i < num_splits; ++i)
+  {
+    auto tmp = extractTensorData<T>(output_tensors[i]);
+    EXPECT_THAT(extractTensorData<T>(output_tensors[i]),
+                ::testing::ElementsAreArray(output_data[i]));
+  }
+}
+
+template <typename T> class SplitVTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int16_t>;
+TYPED_TEST_SUITE(SplitVTest, DataTypes);
+
+TYPED_TEST(SplitVTest, ThreeDimensional)
+{
+  Check<TypeParam>(
+    /*axis=*/0, /*splits_size=*/{1, 2}, {3, 3, 3},
+    {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
+     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
+    {
+      {1, 2, 3, 4, 5, 6, 7, 8, 9},                                             //
+      {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27} //
+    });
+  Check<TypeParam>(
+    /*axis=*/1, /*splits_size=*/{1, 2}, {3, 3, 3},
+    {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
+     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
+    {
+      {1, 2, 3, 10, 11, 12, 19, 20, 21},                                 //
+      {4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 18, 22, 23, 24, 25, 26, 27} //
+    });
+  Check<TypeParam>(
+    /*axis=*/2, /*splits_size=*/{1, 2}, {3, 3, 3},
+    {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
+     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
+    {
+      {1, 4, 7, 10, 13, 16, 19, 22, 25},                                 //
+      {2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23, 24, 26, 27} //
+    });
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.cpp
new file mode 100644
index 000000000..46e9fc9ad
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sqrt.h"
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Sqrt::Sqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Sqrt::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    throw std::runtime_error("Input/output tensor data type mismatch.");
+  }
+  output()->resize(input()->shape());
+}
+
+void Sqrt::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Sqrt::evalFloat() const
+{
+  auto in = getTensorData<float>(input());
+  auto out = getTensorData<float>(output());
+  auto size = getTensorShape(input()).FlatSize();
+  for (auto i = in; i != in + size; ++i)
+  {
+    *out = std::sqrt(*i);
+    ++out;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.h b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.h
new file mode 100644
index 000000000..4034655ed
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQRT_H
+#define LUCI_INTERPRETER_KERNELS_SQRT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Sqrt : public Kernel
+{
+public:
+  Sqrt(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQRT_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.test.cpp
new file mode 100644
index 000000000..96835fbfc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sqrt.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Sqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(SqrtTest, SimpleSqrt)
+{
+  Check(
+    /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+    /*input_data=*/
+    {
+      0, 8, 2, 4,    //
+      3, 7, 10, 0.3, //
+    },
+    /*output_data=*/
+    {
+      0.0, 2.8284271, 1.4142136, 2,                //
+      1.7320508, 2.6457513, 3.1622777, 0.54772256, //
+    });
+}
+
+TEST(SqrtTest, Input_Output_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Sqrt kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(SqrtTest, Invalid_Input_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Sqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Square.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Square.cpp
new file mode 100644
index 000000000..bc71905c1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Square.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Square.h"
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Square::Square(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Square::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    throw std::runtime_error("Input/output tensor data type mismatch.");
+  }
+  output()->resize(input()->shape());
+}
+
+void Square::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Square::evalFloat() const
+{
+  auto in = getTensorData<float>(input());
+  auto out = getTensorData<float>(output());
+  auto size = getTensorShape(input()).FlatSize();
+  for (auto i = in; i != in + size; ++i)
+  {
+    *out = (*i) * (*i);
+    ++out;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Square.h b/compiler/luci-micro/luci-interpreter/src/kernels/Square.h
new file mode 100644
index 000000000..73ed5a707
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Square.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQUARE_H
+#define LUCI_INTERPRETER_KERNELS_SQUARE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Square : public Kernel
+{
+public:
+  Square(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQUARE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Square.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Square.test.cpp
new file mode 100644
index 000000000..51662dea7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Square.test.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Square.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(SquareTest, Float)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape{3, 1, 2};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Square kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{1.0, 0.0, 1.0, 121.0, 4.0, 2.0736};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.cpp
new file mode 100644
index 000000000..3bafeba4a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SquaredDifference.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SquaredDifference::SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void SquaredDifference::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void SquaredDifference::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalSquaredDifference<float>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> inline void SquaredDifference::evalSquaredDifference() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                        getTensorShape(input2()), getTensorData<T>(input2()),
+                        getTensorShape(output()), getTensorData<T>(output()), [](T x, T y) {
+                          const T difference = x - y;
+                          return difference * difference;
+                        });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.h b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.h
new file mode 100644
index 000000000..9327caf93
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
+#define LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SquaredDifference : public Kernel
+{
+public:
+  SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> inline void evalSquaredDifference() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp
new file mode 100644
index 000000000..2819c01e2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SquaredDifference.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(SquaredDifferenceTest, Float)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape{3, 1, 2};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{4.0, 0.0, 4.0, 1.0, 1.0, 0.0001};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(SquaredDifferenceTest, FloatBroadcast)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape1{3, 1, 2};
+  Shape input_shape2{1};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{1.0};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape1, input_data1, memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape2, input_data2, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.0, 1.0, 4.0, 100.0, 9.0, 5.9536};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.cpp
new file mode 100644
index 000000000..4a75518c7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Squeeze.h"
+
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Squeeze::Squeeze(const Tensor *input, Tensor *output, const SqueezeParams &params)
+  : KernelWithParams<SqueezeParams>({input}, {output}, params)
+{
+}
+
+void Squeeze::configure()
+{
+  int input_num_dims = input()->shape().num_dims();
+  int num_squeeze_dims = params().squeeze_dims.size();
+  assert(input_num_dims <= 8);
+  bool should_squeeze[8] = {false};
+  int num_squeezed_dims = 0;
+  if (num_squeeze_dims == 0)
+  {
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      if (input()->shape().dim(idx) == 1)
+      {
+        should_squeeze[idx] = true;
+        ++num_squeezed_dims;
+      }
+    }
+  }
+  else
+  {
+    for (int idx = 0; idx < num_squeeze_dims; ++idx)
+    {
+      int current = params().squeeze_dims[idx] < 0 ? params().squeeze_dims[idx] + input_num_dims
+                                                   : params().squeeze_dims[idx];
+      assert(current >= 0 && current < input_num_dims && input()->shape().dim(current) == 1);
+      if (!should_squeeze[current])
+        ++num_squeezed_dims;
+      should_squeeze[current] = true;
+    }
+  }
+  Shape output_shape(input_num_dims - num_squeezed_dims);
+  for (int in_idx = 0, out_idx = 0; in_idx < input_num_dims; ++in_idx)
+  {
+    if (!should_squeeze[in_idx])
+    {
+      output_shape.dim(out_idx++) = input()->shape().dim(in_idx);
+    }
+  }
+  output()->resize(output_shape);
+}
+
+void Squeeze::execute() const
+{
+  assert(input()->shape().num_elements() == output()->shape().num_elements());
+
+  const auto *input_data = input()->data<void>();
+  auto *output_data = output()->data<void>();
+  std::memcpy(output_data, input_data,
+              getDataTypeSize(input()->element_type()) * input()->shape().num_elements());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.h b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.h
new file mode 100644
index 000000000..687af5158
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQUEEZE_H
+#define LUCI_INTERPRETER_KERNELS_SQUEEZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Squeeze : public KernelWithParams<SqueezeParams>
+{
+public:
+  Squeeze(const Tensor *input, Tensor *output, const SqueezeParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQUEEZE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.test.cpp
new file mode 100644
index 000000000..1bc0b6459
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.test.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Squeeze.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T> input_data, std::initializer_list<T> output_data,
+           std::initializer_list<int32_t> squeeze_dims)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  SqueezeParams params{};
+  params.squeeze_dims = squeeze_dims;
+
+  Squeeze kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class SqueezeTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(SqueezeTest, DataTypes);
+
+TYPED_TEST(SqueezeTest, TotalTest)
+{
+  Check<TypeParam>(
+    /*input_shape=*/{1, 24, 1}, /*output_shape=*/{24},
+    /*input_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+                    13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
+    /*output_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+                     13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
+    {-1, 0});
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.cpp
new file mode 100644
index 000000000..a8730d861
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/StridedSlice.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/strided_slice.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+StridedSlice::StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end,
+                           const Tensor *strides, Tensor *output, const StridedSliceParams &params)
+  : KernelWithParams<StridedSliceParams>({input, begin, end, strides}, {output}, params)
+{
+}
+
+void StridedSlice::configure()
+{
+  assert(begin()->shape().num_dims() == 1);
+  assert(end()->shape().num_dims() == 1);
+  assert(strides()->shape().num_dims() == 1);
+  assert(input()->element_type() == output()->element_type());
+  assert(begin()->element_type() == DataType::S32);
+  assert(end()->element_type() == DataType::S32);
+  assert(strides()->element_type() == DataType::S32);
+  assert(input()->shape().num_dims() <= 4);
+  if (params().ellipsis_mask != 0)
+  {
+    throw std::runtime_error("ellipsis_mask is not implemented yet.");
+  }
+  if (params().new_axis_mask != 0)
+  {
+    throw std::runtime_error("new_axis_mask is not implemented yet.");
+  }
+  if (input()->element_type() == DataType::U8)
+  {
+    assert(input()->scale() == output()->scale());
+    assert(input()->zero_point() == output()->zero_point());
+  }
+  tflite::StridedSliceParams op_params{};
+  op_params.start_indices_count = input()->shape().num_dims();
+  op_params.stop_indices_count = input()->shape().num_dims();
+  op_params.strides_count = input()->shape().num_dims();
+
+  for (int i = 0; i < input()->shape().num_dims(); i++)
+  {
+    op_params.start_indices[i] = getTensorData<int32_t>(begin())[i];
+    op_params.stop_indices[i] = getTensorData<int32_t>(end())[i];
+    op_params.strides[i] = getTensorData<int32_t>(strides())[i];
+  }
+  op_params.begin_mask = params().begin_mask;
+  op_params.ellipsis_mask = 0;
+  op_params.end_mask = params().end_mask;
+  op_params.new_axis_mask = 0;
+  op_params.shrink_axis_mask = params().shrink_axis_mask;
+  std::vector<int32_t> output_shape_vector;
+  for (int i = 0; i < input()->shape().num_dims(); i++)
+  {
+    int idx = input()->shape().num_dims() - i - 1;
+    int32_t stride = getTensorData<int32_t>(strides())[idx];
+    assert(stride != 0);
+    int32_t begin = ::tflite::strided_slice::StartForAxis(op_params, getTensorShape(input()), idx);
+    int32_t end =
+      ::tflite::strided_slice::StopForAxis(op_params, getTensorShape(input()), idx, begin);
+
+    const bool shrink_axis = params().shrink_axis_mask & (1 << idx);
+    if (shrink_axis)
+    {
+      end = begin + 1;
+    }
+
+    int32_t dim_shape = std::ceil((end - begin) / static_cast<float>(stride));
+    dim_shape = dim_shape < 0 ? 0 : dim_shape;
+    if (!shrink_axis)
+    {
+      output_shape_vector.push_back(dim_shape);
+    }
+  }
+  Shape output_shape = Shape(output_shape_vector.size());
+  for (size_t i = 0; i < output_shape_vector.size(); i++)
+  {
+    output_shape.dim(i) = output_shape_vector[output_shape_vector.size() - i - 1];
+  }
+  output()->resize(output_shape);
+}
+
+void StridedSlice::execute() const
+{
+  tflite::StridedSliceParams op_params{};
+  op_params.start_indices_count = input()->shape().num_dims();
+  op_params.stop_indices_count = input()->shape().num_dims();
+  op_params.strides_count = input()->shape().num_dims();
+
+  for (int i = 0; i < input()->shape().num_dims(); i++)
+  {
+    op_params.start_indices[i] = getTensorData<int32_t>(begin())[i];
+    op_params.stop_indices[i] = getTensorData<int32_t>(end())[i];
+    op_params.strides[i] = getTensorData<int32_t>(strides())[i];
+  }
+  op_params.begin_mask = params().begin_mask;
+  op_params.ellipsis_mask = 0;
+  op_params.end_mask = params().end_mask;
+  op_params.new_axis_mask = 0;
+  op_params.shrink_axis_mask = params().shrink_axis_mask;
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+                                          getTensorData<float>(input()), getTensorShape(output()),
+                                          getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+                                          getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                          getTensorData<uint8_t>(output()));
+      break;
+    case DataType::S32:
+      tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+                                          getTensorData<int32_t>(input()), getTensorShape(output()),
+                                          getTensorData<int32_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.h b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.h
new file mode 100644
index 000000000..fc96893a7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_STRIDEDSLICE_H
+#define LUCI_INTERPRETER_KERNELS_STRIDEDSLICE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class StridedSlice : public KernelWithParams<StridedSliceParams>
+{
+public:
+  StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end, const Tensor *strides,
+               Tensor *output, const StridedSliceParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *begin() const { return _inputs[1]; }
+  const Tensor *end() const { return _inputs[2]; }
+  const Tensor *strides() const { return _inputs[3]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_STRIDEDSLICE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp
new file mode 100644
index 000000000..399cdebed
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/StridedSlice.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(StridedSliceTest, Float)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape{2, 3, 2};
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  Shape begin_shape{3};
+  std::vector<int32_t> begin_data{0, 0, 0};
+  Shape end_shape{3};
+  std::vector<int32_t> end_data{1, 3, 2};
+  Shape strides_shape{3};
+  std::vector<int32_t> strides_data{1, 1, 1};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor begin_tensor =
+    makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get());
+  Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data, memory_manager.get());
+  Tensor strides_tensor =
+    makeInputTensor<DataType::S32>(strides_shape, strides_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.ellipsis_mask = 0;
+  params.new_axis_mask = 0;
+  params.shrink_axis_mask = 1;
+
+  StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor,
+                      params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<int32_t> output_shape{3, 2};
+  std::vector<float> output_data{1, 2, 3, 4, 5, 6};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(StridedSliceTest, Uint8)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape{2, 3, 2};
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  Shape begin_shape{3};
+  std::vector<int32_t> begin_data{0, 0, 0};
+  Shape end_shape{3};
+  std::vector<int32_t> end_data{1, 3, 2};
+  Shape strides_shape{3};
+  std::vector<int32_t> strides_data{1, 1, 1};
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, 1.0f, 0, input_data, memory_manager.get());
+  Tensor begin_tensor =
+    makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get());
+  Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data, memory_manager.get());
+  Tensor strides_tensor =
+    makeInputTensor<DataType::S32>(strides_shape, strides_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0f, 0);
+
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.ellipsis_mask = 0;
+  params.new_axis_mask = 0;
+  params.shrink_axis_mask = 1;
+
+  StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor,
+                      params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<int32_t> output_shape{3, 2};
+  std::vector<float> output_data{1, 2, 3, 4, 5, 6};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sub.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.cpp
new file mode 100644
index 000000000..24b6a72e5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.cpp
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sub.h"
+#include "kernels/Utils.h"
+
+#include "PALSub.h"
+
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams &params)
+  : KernelWithParams<SubParams>({input1, input2}, {output}, params)
+{
+}
+
+void Sub::configure()
+{
+  LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type()))
+  LUCI_INTERPRETER_CHECK(!(input1()->element_type() != output()->element_type()))
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Sub::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Sub::evalFloat() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<float>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastSubSlow(
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    luci_interpreter_pal::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                              getTensorShape(input2()), getTensorData<float>(input2()),
+                              getTensorShape(output()), getTensorData<float>(output()));
+  }
+}
+
+template <typename T> void Sub::evalInteger() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastSubSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+void Sub::evalQuantized() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const int left_shift = 20;
+  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+  const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+  const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+  int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+  int input1_shift{}, input2_shift{}, output_shift{};
+  quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+  quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+  quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ArithmeticParams params{};
+  params.left_shift = left_shift;
+  // The kernel expects inputs' zero points to be negated.
+  params.input1_offset = -input1()->zero_point(); // Note the '-'.
+  params.input1_multiplier = input1_multiplier;
+  params.input1_shift = input1_shift;
+  params.input2_offset = -input2()->zero_point(); // Note the '-'.
+  params.input2_multiplier = input2_multiplier;
+  params.input2_shift = input2_shift;
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastSubSlow(
+      params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+      getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+                               getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+                               getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sub.h b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.h
new file mode 100644
index 000000000..23952b3bd
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SUB_H
+#define LUCI_INTERPRETER_KERNELS_SUB_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Sub : public KernelWithParams<SubParams>
+{
+public:
+  Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams &params);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SUB_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sub.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.test.cpp
new file mode 100644
index 000000000..9abafd49a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.test.cpp
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sub.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+#include <algorithm>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+using std::pair;
+using std::vector;
+using std::transform;
+using std::initializer_list;
+
+class SubTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+// for quantized Add, the error shouldn't exceed step
+float GetTolerance(float min, float max)
+{
+  float kQuantizedStep = (max - min) / 255.0;
+  return kQuantizedStep;
+}
+
+TEST_F(SubTest, Uint8)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  vector<float> base_data = {-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                             1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  vector<Shape> test_shapes = {{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  vector<float> test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  vector<vector<int32_t>> output_shapes = {{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+  vector<vector<float>> output_data = {
+    {-0.5f, 2.0f,  0.1f,  1.8f,  -1.3f, 1.4f,  0.7f, 0.2f,  1.3f, 0.0f,  -0.1f, -0.4f,
+     0.6f,  -1.4f, 1.2f,  -1.6f, -0.2f, -2.0f, 1.0f, 2.5f,  1.6f, 2.3f,  0.2f,  1.9f,
+     -1.8f, -0.3f, -1.2f, -0.5f, -2.6f, -0.9f, 0.5f, -2.5f, 1.1f, -2.7f, -0.3f, -3.0f},
+    {-0.5f, 2.0f, 1.3f, 0.0f, -0.2f, -2.0f, 1.0f, 2.5f, -1.2f, -0.5f, -0.3f, -3.0f},
+    {-0.5f, 2.1f,  -0.6f, 2.0f,  0.1f,  2.7f,  0.7f, 0.3f,  0.6f,  0.2f,  1.3f,  0.9f,
+     0.6f,  -1.3f, 0.5f,  -1.4f, 1.2f,  -0.7f, 0.7f, 2.3f,  0.2f,  1.8f,  0.3f,  1.9f,
+     -2.1f, -0.5f, -2.6f, -1.0f, -2.5f, -0.9f, 0.2f, -2.7f, -0.3f, -3.0f, -0.2f, -3.0f},
+    {-0.5f, 2.1f, 0.6f, 0.2f, 1.2f, -0.7f, 0.7f, 2.3f, -2.6f, -1.0f, -0.2f, -3.0f}};
+
+  float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
+  pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f);
+  for (size_t i = 0; i < output_data.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::U8>(
+      base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::U8>(
+      test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
+    Tensor output_tensor =
+      makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+    SubParams params{};
+    params.activation = Activation::NONE;
+
+    Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(dequantizeTensorData(output_tensor),
+                FloatArrayNear(output_data[i], kQuantizedTolerance));
+    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+  }
+
+  // Inversion step for output_data, because subtract is not commutative operation
+  auto multiply = [](auto &i) {
+    transform(i.begin(), i.end(), i.begin(), [](auto &value) { return value * -1.0f; });
+  };
+  for_each(output_data.begin(), output_data.end(), multiply);
+
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < output_data.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::U8>(
+      test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::U8>(
+      base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
+    Tensor output_tensor =
+      makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+    SubParams params{};
+    params.activation = Activation::NONE;
+
+    Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(dequantizeTensorData(output_tensor),
+                FloatArrayNear(output_data[i], kQuantizedTolerance));
+    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+  }
+}
+
+TEST_F(SubTest, Float)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  vector<vector<int32_t>> output_shapes{{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+  vector<vector<float>> test_outputs = {
+    {0.0f, 2.0f, 0.1f, 1.8f, 0.0f, 1.4f, 0.7f, 0.2f, 1.3f, 0.0f, 0.0f, 0.0f,
+     0.6f, 0.0f, 1.2f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 1.6f, 2.3f, 0.2f, 1.9f,
+     0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.5f, 0.0f, 1.1f, 0.0f, 0.0f, 0.0f},
+    {0.0f, 2.0f, 1.3f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f},
+    {0.0f, 2.1f, 0.0f, 2.0f, 0.1f, 2.7f, 0.7f, 0.3f, 0.6f, 0.2f, 1.3f, 0.9f,
+     0.6f, 0.0f, 0.5f, 0.0f, 1.2f, 0.0f, 0.7f, 2.3f, 0.2f, 1.8f, 0.3f, 1.9f,
+     0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.2f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+    {0.0f, 2.1f, 0.6f, 0.2f, 1.2f, 0.0f, 0.7f, 2.3f, 0.0f, 0.0f, 0.0f, 0.0f}};
+
+  vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                            1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+    SubParams params{};
+    params.activation = Activation::RELU;
+
+    Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+      << "With shape number " << i;
+
+    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+  }
+}
+
+template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<dtype>> test_outputs = {
+    {0, 1, 2, 3, 0, 0, 0, 0, 4,  1, 0, 0, 0, 0, 7,  0, 3, 0,
+     0, 2, 4, 4, 0, 0, 3, 0, 10, 0, 6, 0, 3, 0, 10, 2, 6, 0},
+    {0, 1, 4, 1, 3, 0, 0, 2, 10, 0, 6, 0},
+    {0, 0, 0, 1, 2, 5, 0, 0, 0, 0, 4, 3, 0, 0, 3, 0, 7, 0,
+     2, 4, 0, 2, 0, 0, 8, 0, 6, 0, 1, 0, 8, 2, 6, 0, 1, 0},
+    {0, 0, 0, 0, 7, 0, 2, 4, 6, 0, 1, 0}};
+  std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1};
+  std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    SubParams params{};
+    params.activation = Activation::RELU;
+
+    Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+};
+
+TEST_F(SubTest, SInt32)
+{
+  CheckInteger<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(SubTest, SInt64)
+{
+  CheckInteger<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(SubTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SubParams params{};
+  params.activation = Activation::RELU;
+
+  Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SubTest, Invalid_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  SubParams params{};
+  params.activation = Activation::RELU;
+
+  Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SubTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U64);
+
+  SubParams params{};
+  params.activation = Activation::RELU;
+
+  Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(SubTest, Mismatching_Input_Int_Types_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  SubParams params{};
+  params.activation = Activation::NONE;
+
+  Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.cpp
new file mode 100644
index 000000000..c4fa16912
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Tanh.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/tanh.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Tanh::Tanh(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Tanh::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::U8)
+  {
+    populateLookupTable();
+  }
+  output()->resize(input()->shape());
+}
+
+void Tanh::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Tanh::evalFloat() const
+{
+  tflite::reference_ops::Tanh(getTensorShape(input()), getTensorData<float>(input()),
+                              getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void Tanh::evalQuantized() const
+{
+  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+  uint8_t *output_data = getTensorData<uint8_t>(output());
+  const uint8_t *input_data = getTensorData<uint8_t>(input());
+  for (int i = 0; i < size; ++i)
+  {
+    output_data[i] = getTableValue(input_data[i]);
+  }
+}
+
+void Tanh::populateLookupTable()
+{
+  const auto input_scale = static_cast<double>(input()->scale());
+  const auto input_zero_point = static_cast<int32_t>(input()->zero_point());
+  const auto output_scale = static_cast<double>(output()->scale());
+  const auto output_zero_point = static_cast<int32_t>(output()->zero_point());
+  const float inverse_scale = 1 / output_scale;
+  int32_t maxval = std::numeric_limits<uint8_t>::max();
+  int32_t minval = std::numeric_limits<uint8_t>::min();
+  for (int32_t val = minval; val <= maxval; ++val)
+  {
+    const float dequantized = input_scale * (val - input_zero_point);
+    const float transformed = std::tanh(dequantized);
+    const float rescaled = std::round(transformed * inverse_scale);
+    const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+    setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)),
+                  static_cast<uint8_t>(val));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.h b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.h
new file mode 100644
index 000000000..8017c9638
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TANH_H
+#define LUCI_INTERPRETER_KERNELS_TANH_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Tanh : public Kernel
+{
+public:
+  Tanh(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void populateLookupTable();
+  void setTableValue(uint8_t value, uint8_t idx) { _table[idx] = value; };
+  uint8_t getTableValue(uint8_t idx) const { return _table[idx]; };
+
+private:
+  uint8_t _table[256]{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TANH_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.test.cpp
new file mode 100644
index 000000000..bfae479a9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.test.cpp
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Tanh.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class TanhTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(TanhTest, Float)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tanh kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    0,          -0.9999877, 0.9640275, 0.999329,  //
+    0.99505475, -0.9640275, 1,         0.7615941, //
+  };
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(TanhTest, Uint8)
+{
+  float kMin = -1;
+  float kMax = 127.f / 128.f;
+  float kTanhTolerance = 2 * (1. / 256);
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(8 * kMin, 8 * kMax);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(kMin, kMax);
+  std::vector<float> input_data{
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 6, 4, 1}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+  Tanh kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+  };
+  std::vector<int32_t> ref_output_shape{2, 6, 4, 1};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data, kTanhTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(TanhTest, InputTypeInvalid_NEG)
+{
+  std::vector<int64_t> input_data{
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::S64>({2, 6, 4, 1}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tanh kernel(&input_tensor, &output_tensor);
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(TanhTest, InputOutputMismatch_NEG)
+{
+  std::vector<float> input_data{
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 6, 4, 1}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Tanh kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.cpp
new file mode 100644
index 000000000..4d983adda
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace testing
+{
+
+using ::testing::FloatNear;
+using ::testing::Matcher;
+
+Tensor makeOutputTensor(DataType element_type) { return Tensor(element_type, {}, {}, ""); }
+
+Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point)
+{
+  return Tensor(element_type, {}, {{scale}, {zero_point}}, "");
+}
+
+std::vector<float> dequantizeTensorData(const Tensor &tensor)
+{
+  if (tensor.element_type() == DataType::U8)
+  {
+    std::vector<uint8_t> data = extractTensorData<uint8_t>(tensor);
+    return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
+  }
+  if (tensor.element_type() == DataType::S8)
+  {
+    std::vector<int8_t> data = extractTensorData<int8_t>(tensor);
+    return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
+  }
+  else if (tensor.element_type() == DataType::S16)
+  {
+    // S16 quantization is symmetric, so zero point should be zero.
+    for (auto zp : tensor.zero_points())
+    {
+      (void)zp;
+      assert(zp == 0);
+    }
+
+    std::vector<int16_t> data = extractTensorData<int16_t>(tensor);
+    if (tensor.scales().size() == 1)
+    {
+      return dequantize(data.data(), data.size(), tensor.scale(), 0);
+    }
+
+    // quantize_dimension breaks shape into two parts:
+    // inner dimensions that contains continuous data with one quantization type
+    // outer dimensions that contains other dimensions
+    const Shape shape = tensor.shape();
+    const int32_t quantized_dimension = tensor.quantized_dimension();
+    assert(quantized_dimension < shape.num_dims());
+    size_t outer_dims_size = 1;
+    int32_t quant_dim_size = shape.dim(quantized_dimension);
+    size_t inner_dims_size = 1;
+    assert(quant_dim_size == tensor.scales().size());
+
+    for (int i = 0; i < quantized_dimension; ++i)
+      outer_dims_size *= shape.dim(i);
+    for (int i = quantized_dimension + 1; i < shape.num_dims(); ++i)
+      inner_dims_size *= shape.dim(i);
+
+    assert(shape.num_elements() == outer_dims_size * quant_dim_size * inner_dims_size);
+
+    std::vector<float> dequantized_data;
+    dequantized_data.reserve(shape.num_elements());
+    for (size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it)
+      for (int32_t channel = 0; channel < quant_dim_size; ++channel)
+      {
+        float scale = tensor.scales()[channel];
+        size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel);
+        std::vector<float> part_dequantized_data =
+          dequantize(data.data() + offset, inner_dims_size, scale, 0);
+        dequantized_data.insert(dequantized_data.end(), part_dequantized_data.begin(),
+                                part_dequantized_data.end());
+      }
+    return dequantized_data;
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+}
+
+Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values, float max_abs_error)
+{
+  std::vector<Matcher<float>> matchers;
+  matchers.reserve(values.size());
+  for (const float v : values)
+  {
+    matchers.emplace_back(FloatNear(v, max_abs_error));
+  }
+  return ElementsAreArray(matchers);
+}
+
+std::vector<int32_t> extractTensorShape(const Tensor &tensor)
+{
+  std::vector<int32_t> result;
+  int dims = tensor.shape().num_dims();
+  for (int i = 0; i < dims; i++)
+  {
+    result.push_back(tensor.shape().dim(i));
+  }
+  return result;
+}
+
+} // namespace testing
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.h b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.h
new file mode 100644
index 000000000..1f5a0c308
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.h
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TESTUTILS_H
+#define LUCI_INTERPRETER_KERNELS_TESTUTILS_H
+
+#include "luci_interpreter/core/Tensor.h"
+#include "luci_interpreter/MemoryManager.h"
+
+#include <type_traits>
+
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace testing
+{
+
+template <typename T>
+std::vector<T> quantize(const float *data, size_t num_elements, float scale, int32_t zero_point);
+
+template <DataType DT>
+Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeImpl<DT>::Type> &data,
+                       IMemoryManager *memory_manager)
+{
+  Tensor tensor(DT, shape, {}, "");
+  memory_manager->allocate_memory(tensor);
+  tensor.writeData(data.data(), data.size() * sizeof(typename DataTypeImpl<DT>::Type));
+  return tensor;
+}
+
+/**
+ * @brief Create layer-wise quantized tensor
+ * @tparam DT base integer data type, for example DataType::U8, DataType::S16, DataType::S64
+ * @param shape desired tensor shape
+ * @param scale scale of quantized number
+ * @param zero_point zero point of quantized number, should be 0 for signed datatypes
+ * @param data floating point data for quantization
+ * @param memory_manager memory manager for allocating memory to tensor
+ * @return created tensor
+ */
+template <DataType DT>
+Tensor makeInputTensor(const Shape &shape, float scale, int32_t zero_point,
+                       const std::vector<float> &data, IMemoryManager *memory_manager)
+{
+  using NativeT = typename DataTypeImpl<DT>::Type;
+  Tensor tensor(DT, shape, {{scale}, {zero_point}}, "");
+  std::vector<NativeT> quantized_data =
+    quantize<NativeT>(data.data(), data.size(), scale, zero_point);
+  memory_manager->allocate_memory(tensor);
+  tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT));
+  return tensor;
+}
+
+/**
+ * @brief Create channel-wise quantized tensor
+ * @tparam DT base integer data type, for example DataType::U8, DataType::S16, DataType::S64
+ * @param shape desired tensor shape
+ * @param scales scales of quantized number
+ * @param zero_points zero points of quantized number, should be 0 for signed datatypes
+ * @param quantize_dimension dimension to apply quantization along. Usually channels/output channels
+ * @param data floating point data for quantization
+ * @param memory_manager memory manager for allocating memory to tensor
+ * @return created tensor
+ */
+template <DataType DT>
+Tensor makeInputTensor(const Shape &shape, const std::vector<float> &scales,
+                       const std::vector<int32_t> &zero_points, int quantized_dimension,
+                       const std::vector<float> &data, IMemoryManager *memory_manager)
+{
+  using NativeT = typename DataTypeImpl<DT>::Type;
+  assert(quantized_dimension < shape.num_dims());
+  Tensor tensor(DT, shape, {scales, zero_points, quantized_dimension}, "");
+
+  // quantize_dimension breaks shape into two parts:
+  // inner dimensions that contains continuous data with one quantization type
+  // outer dimensions that contains other dimensions
+  size_t outer_dims_size = 1;
+  int32_t quant_dim_size = shape.dim(quantized_dimension);
+  size_t inner_dims_size = 1;
+  assert(quant_dim_size == scales.size());
+  assert(quant_dim_size == zero_points.size());
+
+  for (int i = 0; i < quantized_dimension; ++i)
+    outer_dims_size *= shape.dim(i);
+  for (int i = quantized_dimension + 1; i < shape.num_dims(); ++i)
+    inner_dims_size *= shape.dim(i);
+
+  assert(shape.num_elements() == outer_dims_size * quant_dim_size * inner_dims_size);
+
+  std::vector<NativeT> quantized_data;
+  quantized_data.reserve(shape.num_elements());
+  for (size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it)
+    for (int32_t channel = 0; channel < quant_dim_size; ++channel)
+    {
+      int32_t zero_point = zero_points[channel];
+      float scale = scales[channel];
+      size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel);
+      std::vector<NativeT> part_quantized_data =
+        quantize<NativeT>(data.data() + offset, inner_dims_size, scale, zero_point);
+      quantized_data.insert(quantized_data.end(), part_quantized_data.begin(),
+                            part_quantized_data.end());
+    }
+  assert(quantized_data.size() == shape.num_elements());
+  memory_manager->allocate_memory(tensor);
+  tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT));
+  return tensor;
+}
+
+Tensor makeOutputTensor(DataType element_type);
+Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point);
+
+std::vector<int32_t> extractTensorShape(const Tensor &tensor);
+
+// Returns the corresponding DataType given the type T.
+template <typename T> constexpr DataType getElementType()
+{
+  if (std::is_same<T, float>::value)
+    return DataType::FLOAT32;
+  if (std::is_same<T, double>::value)
+    return DataType::FLOAT64;
+  if (std::is_same<T, uint8_t>::value)
+    return DataType::U8;
+  if (std::is_same<T, uint16_t>::value)
+    return DataType::U16;
+  if (std::is_same<T, uint32_t>::value)
+    return DataType::U32;
+  if (std::is_same<T, uint64_t>::value)
+    return DataType::U64;
+  if (std::is_same<T, int8_t>::value)
+    return DataType::S8;
+  if (std::is_same<T, int16_t>::value)
+    return DataType::S16;
+  if (std::is_same<T, int32_t>::value)
+    return DataType::S32;
+  if (std::is_same<T, int64_t>::value)
+    return DataType::S64;
+  if (std::is_same<T, bool>::value)
+    return DataType::BOOL;
+  return DataType::Unknown;
+}
+
+template <typename T> std::vector<T> extractTensorData(const Tensor &tensor)
+{
+  const auto *data_ptr = tensor.data<T>();
+  return std::vector<T>(data_ptr, data_ptr + tensor.shape().num_elements());
+}
+
+std::vector<float> dequantizeTensorData(const Tensor &tensor);
+
+// Array version of `::testing::FloatNear` matcher.
+::testing::Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values,
+                                                      float max_abs_error = 1.0e-5f);
+
+template <typename T>
+std::vector<T> quantize(const float *data, size_t num_elements, float scale, int32_t zero_point)
+{
+  static_assert(std::is_integral<T>::value, "Integral type expected.");
+
+  float q_min{}, q_max{};
+  if (std::is_signed<T>::value)
+  {
+    q_min = -std::numeric_limits<T>::max();
+    q_max = std::numeric_limits<T>::max();
+  }
+  else
+  {
+    q_min = 0;
+    q_max = std::numeric_limits<T>::max();
+  }
+
+  std::vector<T> q;
+  for (size_t i = 0; i < num_elements; ++i)
+  {
+    const auto &f = data[i];
+    q.push_back(static_cast<T>(
+      std::max<float>(q_min, std::min<float>(q_max, std::round(zero_point + (f / scale))))));
+  }
+  return q;
+}
+
+template <typename T>
+std::vector<float> dequantize(const T *data, size_t num_elements, float scale, int32_t zero_point)
+{
+  static_assert(std::is_integral<T>::value, "Integral type expected.");
+  std::vector<float> f;
+  for (size_t i = 0; i < num_elements; ++i)
+  {
+    const T &q = data[i];
+    f.push_back(scale * (q - zero_point));
+  }
+  return f;
+}
+
+// NOTE Returns scale and zero point for _asymmetric_ range (both signed and unsigned).
+template <typename T> std::pair<float, int32_t> quantizationParams(float f_min, float f_max)
+{
+  static_assert(std::is_integral<T>::value, "Integral type expected.");
+  int32_t zero_point = 0;
+  float scale = 0;
+  const T qmin = std::numeric_limits<T>::lowest();
+  const T qmax = std::numeric_limits<T>::max();
+  const float qmin_double = qmin;
+  const float qmax_double = qmax;
+  // 0 should always be a representable value. Let's assume that the initial
+  // min,max range contains 0.
+  assert(f_max >= 0);
+  assert(f_min <= 0);
+  if (f_min == f_max)
+  {
+    // Special case where the min,max range is a point. Should be {0}.
+    assert(f_max == 0);
+    assert(f_min == 0);
+    return {scale, zero_point};
+  }
+
+  // General case.
+  //
+  // First determine the scale.
+  scale = (f_max - f_min) / (qmax_double - qmin_double);
+
+  // Zero-point computation.
+  // First the initial floating-point computation. The zero-point can be
+  // determined from solving an affine equation for any known pair
+  // (real value, corresponding quantized value).
+  // We know two such pairs: (rmin, qmin) and (rmax, qmax).
+  // The arithmetic error on the zero point computed from either pair
+  // will be roughly machine_epsilon * (sum of absolute values of terms)
+  // so we want to use the variant that adds the smaller terms.
+  const float zero_point_from_min = qmin_double - f_min / scale;
+  const float zero_point_from_max = qmax_double - f_max / scale;
+
+  const float zero_point_from_min_error = std::abs(qmin_double) + std::abs(f_min / scale);
+
+  const float zero_point_from_max_error = std::abs(qmax_double) + std::abs(f_max / scale);
+
+  const float zero_point_double = zero_point_from_min_error < zero_point_from_max_error
+                                    ? zero_point_from_min
+                                    : zero_point_from_max;
+
+  // Now we need to nudge the zero point to be an integer
+  // (our zero points are integer, and this is motivated by the requirement
+  // to be able to represent the real value "0" exactly as a quantized value,
+  // which is required in multiple places, for example in Im2col with SAME
+  //  padding).
+
+  T nudged_zero_point = 0;
+  if (zero_point_double < qmin_double)
+  {
+    nudged_zero_point = qmin;
+  }
+  else if (zero_point_double > qmax_double)
+  {
+    nudged_zero_point = qmax;
+  }
+  else
+  {
+    nudged_zero_point = static_cast<T>(std::round(zero_point_double));
+  }
+
+  // The zero point should always be in the range of quantized value,
+  // // [qmin, qmax].
+  assert(qmax >= nudged_zero_point);
+  assert(qmin <= nudged_zero_point);
+  zero_point = nudged_zero_point;
+  // finally, return the values
+  return {scale, zero_point};
+}
+
+inline float getTolerance(float min, float max, int quantize_steps)
+{
+  return ((max - min) / quantize_steps);
+}
+
+} // namespace testing
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TESTUTILS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.cpp
new file mode 100644
index 000000000..802d87295
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Transpose.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Transpose::Transpose(const Tensor *input, const Tensor *perm, Tensor *output)
+  : Kernel({input, perm}, {output})
+{
+}
+
+void Transpose::configure()
+{
+  // Transpose op only supports 1D-4D input arrays.
+  int dims = input()->shape().num_dims();
+  const int32_t *perm_data = getTensorData<int32_t>(perm());
+
+  assert(input()->shape().num_dims() <= 4);
+  assert(input()->element_type() == output()->element_type());
+
+  assert(perm()->shape().num_dims() == 1);
+  assert(perm()->shape().dim(0) == dims);
+
+  Shape output_shape(dims);
+  for (int i = 0; i < dims; i++)
+  {
+    assert(perm_data[i] < dims && perm_data[i] >= 0);
+    output_shape.dim(i) = input()->shape().dim(perm_data[i]);
+  }
+
+  output()->resize(output_shape);
+}
+
+void Transpose::execute() const
+{
+  tflite::TransposeParams params{};
+  const int32_t *perm_data = getTensorData<int32_t>(perm());
+  const int32_t size = perm()->shape().dim(0);
+  params.perm_count = size;
+  for (int i = 0; i < size; i++)
+    params.perm[i] = perm_data[i];
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::Transpose(params, getTensorShape(input()),
+                                       getTensorData<float>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      tflite::reference_ops::Transpose(params, getTensorShape(input()),
+                                       getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                       getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.h b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.h
new file mode 100644
index 000000000..d6f89c352
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TRANSPOSE_H
+#define LUCI_INTERPRETER_KERNELS_TRANSPOSE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Transpose : public Kernel
+{
+public:
+  Transpose(const Tensor *input, const Tensor *perm, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *perm() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TRANSPOSE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.test.cpp
new file mode 100644
index 000000000..43be8f8b9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.test.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Transpose.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> perm_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
+           std::initializer_list<int32_t> perm_data, std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor perm_tensor = makeInputTensor<DataType::S32>(perm_shape, perm_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  Transpose kernel(&input_tensor, &perm_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+template <typename T> class TransposeTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(TransposeTest, DataTypes);
+
+TYPED_TEST(TransposeTest, Small3D)
+{
+  Check<TypeParam>(/*input_shape=*/{2, 3, 4}, /*perm_shape=*/{3}, /*output_shape=*/{4, 2, 3},
+                   /*input_data=*/{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+                                   12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
+                   /*perm_data=*/{2, 0, 1},
+                   /*output_data=*/{0, 4, 8,  12, 16, 20, 1, 5, 9,  13, 17, 21,
+                                    2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23});
+}
+
+TYPED_TEST(TransposeTest, Large4D)
+{
+  Check<TypeParam>(
+    /*input_shape=*/{2, 3, 4, 5}, /*perm_shape=*/{4}, /*output_shape=*/{4, 2, 3, 5},
+    /*input_data=*/{0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+                    15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+                    30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+                    45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+                    60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+                    75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+                    90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+                    105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
+    /*perm_data=*/{2, 0, 1, 3},
+    /*output_data=*/{0,  1,  2,  3,  4,  20, 21, 22, 23, 24, 40,  41,  42,  43,  44,
+                     60, 61, 62, 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104,
+                     5,  6,  7,  8,  9,  25, 26, 27, 28, 29, 45,  46,  47,  48,  49,
+                     65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109,
+                     10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50,  51,  52,  53,  54,
+                     70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114,
+                     15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55,  56,  57,  58,  59,
+                     75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119});
+}
+
+TYPED_TEST(TransposeTest, Large2D)
+{
+  Check<TypeParam>(
+    /*input_shape=*/{10, 12}, /*perm_shape=*/{2}, /*output_shape=*/{12, 10},
+    /*input_data=*/{0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+                    15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+                    30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+                    45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+                    60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+                    75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+                    90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+                    105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
+    /*perm_data=*/{1, 0},
+    /*output_data=*/{0,  12, 24, 36,  48,  60, 72, 84, 96,  108, 1,  13, 25, 37,  49,
+                     61, 73, 85, 97,  109, 2,  14, 26, 38,  50,  62, 74, 86, 98,  110,
+                     3,  15, 27, 39,  51,  63, 75, 87, 99,  111, 4,  16, 28, 40,  52,
+                     64, 76, 88, 100, 112, 5,  17, 29, 41,  53,  65, 77, 89, 101, 113,
+                     6,  18, 30, 42,  54,  66, 78, 90, 102, 114, 7,  19, 31, 43,  55,
+                     67, 79, 91, 103, 115, 8,  20, 32, 44,  56,  68, 80, 92, 104, 116,
+                     9,  21, 33, 45,  57,  69, 81, 93, 105, 117, 10, 22, 34, 46,  58,
+                     70, 82, 94, 106, 118, 11, 23, 35, 47,  59,  71, 83, 95, 107, 119});
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.cpp
new file mode 100644
index 000000000..1b5f9d941
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.cpp
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TransposeConv.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose_conv.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
+                             const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
+                             const TransposeConvParams &params)
+  : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias},
+                                          {output, scratch_tensor}, params)
+{
+}
+
+TransposeConv::~TransposeConv()
+{
+  // Define destructor here, to delete vector of qunatized multipliers properly
+}
+
+void TransposeConv::configure()
+{
+  assert(output_shape()->shape().num_dims() == 1);
+  assert(input()->shape().num_dims() == 4);
+  assert(filter()->shape().num_dims() == 4);
+  assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8 ||
+         input()->element_type() == DataType::S16);
+  assert(input()->element_type() == output()->element_type());
+  assert(input()->shape().dim(3) == filter()->shape().dim(3));
+
+  const int num_dims = output_shape()->shape().dim(0);
+  Shape out_shape(num_dims);
+  const auto *shape_data = getTensorData<int32_t>(output_shape());
+  for (int i = 0; i < num_dims; i++)
+    out_shape.dim(i) = shape_data[i];
+  output()->resize(out_shape);
+
+  const int32_t filter_height = filter()->shape().dim(1);
+  const int32_t filter_width = filter()->shape().dim(2);
+  const int32_t output_height = out_shape.dim(1);
+  const int32_t output_width = out_shape.dim(2);
+
+  const int32_t unused_output_height =
+    computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1);
+  const int32_t unused_output_width =
+    computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1);
+
+  _padding_height =
+    computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height);
+  _padding_width =
+    computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width);
+
+  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
+  {
+    auto scratch_tensor = getOutputTensors()[1];
+    scratch_tensor->resize(output()->shape());
+    const std::vector<double> real_multipliers =
+      getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+    _quant_multipliers = quantizeMultipliers(real_multipliers);
+  }
+  else
+  {
+    auto scratch_tensor = getOutputTensors()[1];
+    scratch_tensor->set_allocatable(false);
+  }
+}
+
+void TransposeConv::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      if (filter()->scales().size() == 1)
+      {
+        evalQuantized();
+      }
+      else if (filter()->scales().size() > 1)
+      {
+        LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+        LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+                               static_cast<size_t>(filter()->shape().dim(0)));
+        evalQuantizedPerChannel();
+      }
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void TransposeConv::evalFloat() const
+{
+  tflite::ConvParams op_params{};
+  op_params.padding_type = tflite::PaddingType::kSame;
+  op_params.padding_values.height = _padding_height;
+  op_params.padding_values.width = _padding_width;
+  op_params.stride_height = params().stride_height;
+  op_params.stride_width = params().stride_width;
+  tflite::reference_ops::TransposeConv(op_params,                                                //
+                                       getTensorShape(input()), getTensorData<float>(input()),   //
+                                       getTensorShape(filter()), getTensorData<float>(filter()), //
+                                       getTensorShape(bias()), getTensorData<float>(bias()),     //
+                                       getTensorShape(output()), getTensorData<float>(output()), //
+                                       tflite::RuntimeShape(), nullptr);
+}
+
+void TransposeConv::evalQuantized() const
+{
+  tflite::ConvParams op_params{};
+  op_params.padding_type = tflite::PaddingType::kSame;
+  op_params.padding_values.height = _padding_height;
+  op_params.padding_values.width = _padding_width;
+  op_params.stride_height = params().stride_height;
+  op_params.stride_width = params().stride_width;
+  // The kernel expects input and filter zero points to be negated.
+  op_params.input_offset = -input()->zero_point();    // Note the '-'.
+  op_params.weights_offset = -filter()->zero_point(); // Note the '-'.
+  op_params.output_offset = output()->zero_point();
+  op_params.output_multiplier = _quant_multipliers[0].multiplier;
+  op_params.output_shift = _quant_multipliers[0].shift;
+  op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min();
+  op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max();
+
+  auto scratch_tensor = getOutputTensors()[1];
+
+  tflite::reference_ops::TransposeConv(op_params,                                                //
+                                       getTensorShape(input()), getTensorData<uint8>(input()),   //
+                                       getTensorShape(filter()), getTensorData<uint8>(filter()), //
+                                       getTensorShape(bias()), getTensorData<int32_t>(bias()),   //
+                                       getTensorShape(output()), getTensorData<uint8>(output()), //
+                                       tflite::RuntimeShape(), nullptr,                          //
+                                       getTensorData<int32_t>(scratch_tensor));
+}
+
+void TransposeConv::evalQuantizedPerChannel() const
+{
+  const auto *input_data = getTensorData<uint8_t>(input());
+  const auto *filter_data = getTensorData<uint8_t>(filter());
+  const auto *bias_data = getTensorData<int32_t>(bias());
+  auto *output_data = getTensorData<uint8_t>(output());
+
+  auto scratch_tensor = getOutputTensors()[1];
+  auto *scratch_data = getTensorData<int32_t>(scratch_tensor);
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t output_depth = filter_shape.dim(0);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
+
+  std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int32_t));
+
+  BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t in_y = 0; in_y < input_height; ++in_y)
+    {
+      for (int32_t in_x = 0; in_x < input_width; ++in_x)
+      {
+        for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+        {
+          const int32_t out_y_origin = in_y * stride_height - _padding_height;
+          const int32_t out_x_origin = in_x * stride_width - _padding_width;
+          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int32_t out_x = out_x_origin + filter_x;
+              const int32_t out_y = out_y_origin + filter_y;
+              if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
+              {
+                for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+                {
+                  const uint8_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const uint8_t filter_val =
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                  scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
+                    static_cast<int32_t>(input_val - input()->zero_point()) *
+                    static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+        {
+          int32_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
+          if (bias_data)
+          {
+            acc += bias_data[out_c];
+          }
+
+          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+            acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
+
+          scaled_acc += output()->zero_point();
+          scaled_acc = std::max(scaled_acc, activation_min);
+          scaled_acc = std::min(scaled_acc, activation_max);
+
+          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+        }
+      }
+    }
+  }
+}
+
+void TransposeConv::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  const auto *filter_data = getTensorData<int16_t>(filter());
+  const auto *bias_data = getTensorData<int64_t>(bias());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  auto scratch_tensor = getOutputTensors()[1];
+  auto *scratch_data = getTensorData<int64_t>(scratch_tensor);
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t output_depth = filter_shape.dim(0);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
+
+  std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int64_t));
+
+  BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t in_y = 0; in_y < input_height; ++in_y)
+    {
+      for (int32_t in_x = 0; in_x < input_width; ++in_x)
+      {
+        for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+        {
+          const int32_t out_y_origin = in_y * stride_height - _padding_height;
+          const int32_t out_x_origin = in_x * stride_width - _padding_width;
+          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int32_t out_x = out_x_origin + filter_x;
+              const int32_t out_y = out_y_origin + filter_y;
+              if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
+              {
+                for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+                {
+                  const int16_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const int16_t filter_val =
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                  scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
+                    static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+        {
+          int64_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
+          if (bias_data)
+          {
+            acc += bias_data[out_c];
+          }
+          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+            acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
+
+          scaled_acc = std::max(scaled_acc, activation_min);
+          scaled_acc = std::min(scaled_acc, activation_max);
+
+          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.h b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.h
new file mode 100644
index 000000000..cea0cf3c7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H
+#define LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ChannelQuantMultipliers;
+
+class TransposeConv : public KernelWithParams<TransposeConvParams>
+{
+public:
+  TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
+                const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
+                const TransposeConvParams &params);
+
+  ~TransposeConv();
+
+  const Tensor *output_shape() const { return _inputs[0]; }
+  const Tensor *filter() const { return _inputs[1]; }
+  const Tensor *input() const { return _inputs[2]; }
+  const Tensor *bias() const { return _inputs[3]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedPerChannel() const;
+  void evalQuantizedS16() const;
+
+private:
+  int32_t _padding_height{};
+  int32_t _padding_width{};
+  // The scaling factor from input to output (aka the 'real multiplier') can
+  // be represented as a fixed point multiplier plus a left shift.
+  std::vector<ChannelQuantMultipliers> _quant_multipliers;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp
new file mode 100644
index 000000000..4856e1b87
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TransposeConv.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T, typename B>
+void Check(std::initializer_list<int32_t> output_shape_shape,
+           std::initializer_list<int32_t> weight_shape, std::initializer_list<int32_t> input_shape,
+           std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<int32_t> output_shape_data, std::initializer_list<T> weight_data,
+           std::initializer_list<T> input_data, std::initializer_list<B> bias_data,
+           std::initializer_list<T> output_data, luci::Padding padding, int32_t stride_height,
+           int32_t stride_width)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  constexpr DataType element_type = getElementType<T>();
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>(output_shape_shape, output_shape_data, memory_manager.get());
+  Tensor weight_tensor =
+    makeInputTensor<element_type>(weight_shape, weight_data, memory_manager.get());
+  Tensor input_data_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+
+  DataType scratch_data_type = element_type == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  TransposeConvParams params{};
+  params.padding = padding;
+  params.stride_height = stride_height;
+  params.stride_width = stride_width;
+
+  if (bias_data.size() != 0)
+  {
+    Tensor bias_tensor =
+      makeInputTensor<getElementType<B>()>(bias_shape, bias_data, memory_manager.get());
+    TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &bias_tensor,
+                         &output_tensor, &scratch_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    memory_manager->allocate_memory(scratch_tensor);
+    kernel.execute();
+  }
+  else
+  {
+    TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, nullptr,
+                         &output_tensor, &scratch_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    memory_manager->allocate_memory(scratch_tensor);
+    kernel.execute();
+  }
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+TEST(TransposeConvTest, FloatSimple)
+{
+  Check<float, float>(
+    /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1},
+    /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
+    /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9},
+    /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    /*bias_data=*/{},
+    /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365},
+    /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
+
+  SUCCEED();
+}
+
+TEST(TransposeConvTest, FloatTwoFiltersTest)
+{
+  Check<float, float>(
+    /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2},
+    /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
+    /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18},
+    /*input_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
+                    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
+    /*bias_data=*/{},
+    /*output_data=*/
+    {184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968, 3352, 3652, 2760},
+    /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
+
+  SUCCEED();
+}
+
+TEST(TransposeConvTest, SimpleBiasTest)
+{
+  Check<float, float>(
+    /*output_shape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1},
+    /*input_shape=*/{1, 2, 2, 1},
+    /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 5, 5, 2},
+    /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18},
+    /*input_data=*/{1, 2, 3, 4},
+    /*bias_data=*/{3, 4},
+    /*output_data=*/{4,  6,  6,  8,  10, 14, 9,  12, 13, 16, 10,  12,  12, 14, 28, 32, 21,
+                     24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52,  57,  64, 24, 28, 30, 34,
+                     64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76},
+    /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2);
+
+  SUCCEED();
+}
+
+TEST(TransposeConvTest, UInt8)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::vector<float> input_data{1, 2, 3, 4};
+  std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+  std::vector<float> bias_data{3, 4};
+  std::vector<int32_t> output_shape_data{1, 5, 5, 2};
+  std::vector<float> ref_output_data{
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+  };
+
+  // Choose quantization parameters carefully.
+  auto input_quant = quantizationParams<uint8_t>(-8.0, 7.9375);  // s = 1 / 16, zp = 128
+  auto filter_quant = quantizationParams<uint8_t>(-24.0, 39.75); // s = 1 / 4, zp = 96
+  auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(
+    {2, 3, 3, 1}, filter_quant.first, filter_quant.second, filter_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({2}, input_quant.first * filter_quant.first,
+                                                      0, bias_data, memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
+
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+  TransposeConvParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+                       &output_tensor, &scratch_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(TransposeConvTest, UInt8_CWQ)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  const int32_t output_channels = 2;
+  std::vector<float> input_data{1, 2, 3, 4};
+  std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+  std::vector<float> bias_data{3, 4};
+  std::vector<int32_t> output_shape_data{1, 5, 5, 2};
+  std::vector<float> ref_output_data{
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+  };
+
+  // Choose quantization parameters carefully.
+  auto input_quant = quantizationParams<uint8_t>(-8.0, 7.9375);  // s = 1 / 16, zp = 128
+  auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(quantizationParams<uint8_t>(0, 17));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(0, 18));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(
+    {output_channels, 3, 3, 1}, filter_scales, filter_zerops, 0, filter_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+                                                      bias_data, memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
+
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+  TransposeConvParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+                       &output_tensor, &scratch_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(TransposeConvTest, SInt16)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::vector<float> input_data{1, 2, 3, 4};
+  std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+  std::vector<float> bias_data{3, 4};
+  std::vector<int32_t> output_shape_data{1, 5, 5, 2};
+  std::vector<float> ref_output_data{
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 2, 1}, 0.25, 0, input_data, memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::S16>({2, 3, 3, 1}, 0.2, 0, filter_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S64>({2}, 0.25 * 0.2, 0, bias_data, memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+  TransposeConvParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+                       &output_tensor, &scratch_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(TransposeConvTest, SInt16_CWQ_weights)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  const int output_channels = 2;
+  const Shape input_shape{1, 2, 2, 1};
+  const Shape filter_shape{output_channels, 3, 3, 1};
+  const Shape bias_shape{output_channels};
+  std::vector<int32_t> output_shape_data{1, 5, 5, output_channels};
+
+  std::vector<float> input_data{1, 2, 3, 4};
+  std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+  std::vector<float> bias_data{3, 4};
+
+  std::vector<float> ref_output_data{
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+  };
+
+  const float input_scale = 0.25;
+  const float output_scale = 0.5;
+  const std::vector<float> filter_scales{0.2f, 0.5f};
+  std::vector<float> bias_scales{filter_scales[0] * input_scale, filter_scales[1] * input_scale};
+  const std::vector<int32_t> zerop(2, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0,
+                                                        filter_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
+
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+  TransposeConvParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+                       &output_tensor, &scratch_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.cpp
new file mode 100644
index 000000000..9127241c0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Unpack.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Unpack::Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams &params)
+  : KernelWithParams<UnpackParams>({input}, std::move(outputs), params)
+{
+}
+
+void Unpack::configure()
+{
+  const Shape &input_shape = input()->shape();
+
+  int axis = _params.axis;
+  if (axis < 0)
+    axis += input()->shape().num_dims();
+  assert(axis >= 0 && axis < input_shape.num_dims());
+
+  Shape output_shape(input_shape.num_dims() - 1);
+  int out_index = 0;
+  for (int in_index = 0; in_index < input_shape.num_dims(); ++in_index)
+  {
+    if (in_index != axis)
+      output_shape.dim(out_index++) = input_shape.dim(in_index);
+  }
+
+  for (Tensor *output : _outputs)
+  {
+    assert(output->element_type() == input()->element_type());
+    output->resize(output_shape);
+  }
+}
+
+template <typename T> void Unpack::executeImpl() const
+{
+  tflite::UnpackParams params{};
+  params.axis = _params.axis;
+  params.num_split = _outputs.size();
+  VectorOfTensors<T, false> all_outputs(_outputs);
+  tflite::reference_ops::Unpack<T>(params, getTensorShape(input()), getTensorData<T>(input()),
+                                   **all_outputs.shapes(), all_outputs.data());
+}
+
+void Unpack::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      return executeImpl<float>();
+    case DataType::U8:
+      return executeImpl<uint8_t>();
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.h b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.h
new file mode 100644
index 000000000..f4a44ecad
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_UNPACK_H
+#define LUCI_INTERPRETER_KERNELS_UNPACK_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Unpack : public KernelWithParams<UnpackParams>
+{
+public:
+  Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output(int index) const { return _outputs[index]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void executeImpl() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_UNPACK_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.test.cpp
new file mode 100644
index 000000000..9384ddc83
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.test.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Unpack.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(int axis, Shape input_shape, std::initializer_list<T> input_data,
+           const std::vector<std::initializer_list<int32_t>> &exp_output_shape,
+           std::vector<std::initializer_list<T>> exp_output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  const int num_outputs = input_shape.dim(axis < 0 ? axis + input_shape.num_dims() : axis);
+
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  std::vector<Tensor> output_tensors;
+  output_tensors.reserve(num_outputs);
+  for (int i = 0; i < num_outputs; ++i)
+  {
+    output_tensors.push_back(makeOutputTensor(element_type));
+  }
+
+  std::vector<Tensor *> output_tensor_ptrs(num_outputs);
+  for (int i = 0; i < num_outputs; ++i)
+  {
+    output_tensor_ptrs[i] = &output_tensors[i];
+  }
+
+  UnpackParams params{};
+  params.axis = axis;
+
+  Unpack kernel(&input_tensor, std::move(output_tensor_ptrs), params);
+  kernel.configure();
+  for (int i = 0; i < num_outputs; i++)
+  {
+    memory_manager->allocate_memory(output_tensors[i]);
+  }
+  kernel.execute();
+
+  for (int i = 0; i < num_outputs; ++i)
+  {
+    EXPECT_THAT(extractTensorData<T>(output_tensors[i]),
+                ::testing::ElementsAreArray(exp_output_data[i]));
+  }
+}
+
+template <typename T> class UnpackTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(UnpackTest, DataTypes);
+
+TYPED_TEST(UnpackTest, ThreeOutputs)
+{
+  Check<TypeParam>(/*axis=*/0, /*input_shape=*/{3, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{2}, {2}, {2}},
+                   /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeOutputsAxisOne)
+{
+  Check<TypeParam>(/*axis=*/1, /*input_shape=*/{3, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{3}, {3}},
+                   /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeOutputsNegativeAxisOne)
+{
+  Check<TypeParam>(/*axis=*/-1, /*input_shape=*/{3, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{3}, {3}},
+                   /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeOutputsNegativeAxisTwo)
+{
+  Check<TypeParam>(/*axis=*/-2, /*input_shape=*/{3, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{2}, {2}, {2}},
+                   /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}});
+}
+
+TYPED_TEST(UnpackTest, OneOutput)
+{
+  Check<TypeParam>(/*axis=*/0, /*input_shape=*/{1, 6},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{6}},
+                   /*exp_output_data=*/{{1, 2, 3, 4, 5, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeDimensionsTwoOutputs)
+{
+  Check<TypeParam>(/*axis=*/2, /*input_shape=*/{2, 2, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8},
+                   /*exp_output_shape=*/{{2, 2}, {2, 2}},
+                   /*exp_output_data=*/{{1, 3, 5, 7}, {2, 4, 6, 8}});
+}
+
+TYPED_TEST(UnpackTest, FiveDimensionsTwoOutputs)
+{
+  Check<TypeParam>(
+    /*axis=*/2, /*input_shape=*/{2, 2, 2, 2, 1},
+    /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    /*exp_output_shape=*/{{2, 2, 2, 1}, {2, 2, 2, 1}},
+    /*exp_output_data=*/
+    {{1, 2, 5, 6, 9, 10, 13, 14}, {3, 4, 7, 8, 11, 12, 15, 16}});
+}
+
+TYPED_TEST(UnpackTest, VectorToScalar)
+{
+  Check<TypeParam>(/*axis=*/0, /*input_shape=*/{5},
+                   /*input_data=*/{1, 2, 3, 4, 5},
+                   /*exp_output_shape=*/{{}, {}, {}, {}, {}},
+                   /*exp_output_data=*/{{1}, {2}, {3}, {4}, {5}});
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.cpp
new file mode 100644
index 000000000..5d8e5db83
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.cpp
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Utils.h"
+
+#include <cassert>
+#include <cmath>
+#include <limits>
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
+{
+  switch (activation)
+  {
+    case Activation::NONE:
+      *activation_min = std::numeric_limits<T>::lowest();
+      *activation_max = std::numeric_limits<T>::max();
+      break;
+    case Activation::RELU:
+      *activation_min = 0;
+      *activation_max = std::numeric_limits<T>::max();
+      break;
+    case Activation::RELU_N1_TO_1:
+      *activation_min = -1;
+      *activation_max = 1;
+      break;
+    case Activation::RELU6:
+      *activation_min = 0;
+      *activation_max = 6;
+      break;
+    default:
+      throw std::runtime_error("Unsupported activation.");
+  }
+}
+
+template void calculateActivationRange(Activation activation, float *activation_min,
+                                       float *activation_max);
+template void calculateActivationRange(Activation activation, int32_t *activation_min,
+                                       int32_t *activation_max);
+template void calculateActivationRange(Activation activation, int64_t *activation_min,
+                                       int64_t *activation_max);
+
+static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
+                                                  const Tensor *output, int32_t *activation_min,
+                                                  int32_t *activation_max)
+{
+  const float scale = output->scale();
+  const int32_t zero_point = output->zero_point();
+
+  auto quantize = [scale, zero_point](float x) {
+    return zero_point + static_cast<int32_t>(std::round(x / scale));
+  };
+
+  switch (activation)
+  {
+    case Activation::NONE:
+    case Activation::TANH:
+      *activation_min = qmin;
+      *activation_max = qmax;
+      break;
+    case Activation::RELU:
+      *activation_min = std::max(qmin, quantize(0.0f));
+      *activation_max = qmax;
+      break;
+    case Activation::RELU_N1_TO_1:
+      *activation_min = std::max(qmin, quantize(-1.0f));
+      *activation_max = std::min(qmax, quantize(1.0f));
+      break;
+    case Activation::RELU6:
+      *activation_min = std::max(qmin, quantize(0.0f));
+      *activation_max = std::min(qmax, quantize(6.0f));
+      break;
+    default:
+      throw std::runtime_error("Unsupported activation.");
+  }
+}
+
+void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
+                                       int32_t *activation_min, int32_t *activation_max)
+{
+  assert(output->zero_points().size() == 1);
+  int32_t qmin{};
+  int32_t qmax{};
+  switch (output->element_type())
+  {
+    case DataType::U8:
+      qmin = 0;
+      qmax = std::numeric_limits<uint8_t>::max();
+      break;
+    case DataType::S8:
+      qmin = -std::numeric_limits<int8_t>::max();
+      qmax = std::numeric_limits<int8_t>::max();
+      break;
+    case DataType::S16:
+      // For now, assume that signed int16 type implies signed symmetric quantization.
+      assert(output->zero_point() == 0);
+      qmin = -std::numeric_limits<int16_t>::max();
+      qmax = std::numeric_limits<int16_t>::max();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+
+  calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, activation_min,
+                                        activation_max);
+}
+
+void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
+{
+  if (double_multiplier == 0.0)
+  {
+    *quantized_multiplier = 0;
+    *shift = 0;
+    return;
+  }
+
+  const double q = std::frexp(double_multiplier, shift);
+  auto q_fixed = static_cast<int64_t>(std::round(q * (INT64_C(1) << 31)));
+
+  if (q_fixed == (INT64_C(1) << 31))
+  {
+    q_fixed /= 2;
+    ++*shift;
+  }
+  assert(q_fixed <= std::numeric_limits<int32_t>::max());
+  // A shift amount smaller than -31 would cause all bits to be shifted out
+  // and thus all results would be zero. We implement that instead with
+  // q_fixed==0, so as to avoid hitting issues with right-shift
+  // operations with shift amounts greater than 31. Note that this happens
+  // roughly when abs(double_multiplier) < 2^-31 and the present handling means
+  // that we're effectively flushing tiny double_multiplier's to zero.
+  // We could conceivably handle values in the range (roughly) [32, 63]
+  // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
+  // the present handling is just doing 'flush denormals to zero'. We could
+  // reconsider and actually generate nonzero denormals if a need arises.
+  if (*shift < -31)
+  {
+    *shift = 0;
+    q_fixed = 0;
+  }
+  *quantized_multiplier = static_cast<int32_t>(q_fixed);
+}
+
+void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
+                                         int *left_shift)
+{
+  assert(double_multiplier < 1.0);
+  assert(double_multiplier > 0.0);
+  int shift;
+  quantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
+  assert(shift <= 0);
+  *left_shift = shift;
+}
+
+Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape)
+{
+  const int num_input1_dims = input1_shape.num_dims();
+  const int num_input2_dims = input2_shape.num_dims();
+  const int num_out_dims = std::max(num_input1_dims, num_input2_dims);
+  Shape output_shape(num_out_dims);
+
+  for (int i = 0; i < num_out_dims; ++i)
+  {
+    const int32_t input1_dim = i < num_input1_dims ? input1_shape.dim(num_input1_dims - i - 1) : 1;
+    const int32_t input2_dim = i < num_input2_dims ? input2_shape.dim(num_input2_dims - i - 1) : 1;
+
+    bool need_broadcast = input1_dim != input2_dim;
+    bool can_broadcast = input1_dim == 1 || input2_dim == 1;
+    LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
+
+    output_shape.dim(num_out_dims - i - 1) = std::max(input1_dim, input2_dim);
+  }
+
+  return output_shape;
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Utils.h b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.h
new file mode 100644
index 000000000..ebeb20e66
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.h
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_UTILS_H
+#define LUCI_INTERPRETER_KERNELS_UTILS_H
+
+#include "core/KernelParams.h"
+#include "luci_interpreter/core/Tensor.h"
+
+#include <tensorflow/lite/kernels/internal/types.h>
+
+#include <cassert>
+#include <cstdint>
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+#define LUCI_INTERPRETER_CHECK(cond)                                                         \
+  if (!(cond))                                                                               \
+    throw std::runtime_error(std::string(__FILE__) + ":" + std::to_string(__LINE__) + +"(" + \
+                             std::string(#cond) + ") was not true.");
+
+inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
+                              int32_t filter_size, int32_t out_size)
+{
+  const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
+  return padding > 0 ? padding : 0;
+}
+
+inline int32_t computePaddingWithOffset(int32_t stride, int32_t dilation_rate, int32_t in_size,
+                                        int32_t filter_size, int32_t out_size, int32_t *offset)
+{
+  int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  int32_t total_padding = ((out_size - 1) * stride + effective_filter_size - in_size);
+  total_padding = total_padding > 0 ? total_padding : 0;
+  *offset = total_padding % 2;
+  return total_padding / 2;
+}
+
+inline int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size,
+                                 int32_t stride, int32_t dilation_rate = 1)
+{
+  const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  switch (padding)
+  {
+    case Padding::SAME:
+      return (image_size + stride - 1) / stride;
+    case Padding::VALID:
+      return (image_size + stride - effective_filter_size) / stride;
+    default:
+      assert(false);
+      return 0;
+  }
+}
+
+inline int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
+{
+  return ((d0 * shape.dim(1) + d1) * shape.dim(2) + d2) * shape.dim(3) + d3;
+}
+
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max);
+
+void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
+                                       int32_t *activation_min, int32_t *activation_max);
+
+template <typename T> constexpr bool one_of_types() { return false; }
+
+// Checks if T is equal to one of {U,Other} types
+template <typename T, typename U, typename... Other> constexpr bool one_of_types()
+{
+  return std::is_same<T, U>::value || one_of_types<T, Other...>();
+}
+
+/**
+ * Fills activation min and max parameters depending on given data type and activation
+ *
+ * T is a template parameter, so after optimization this code left with only required if case
+ *
+ * @tparam T data type of arithmetic operation output tensor
+ * @param params tflite params to fill
+ * @param activation luci_interpreter::Activation of arithmetic operation
+ */
+template <typename T>
+void fillArithmeticActivationRange(tflite::ArithmeticParams &p, Activation act)
+{
+  static_assert(one_of_types<T, float, int32_t, int64_t>(), "Unsupported dtype");
+
+  if (std::is_same<T, float>::value)
+    calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max);
+  if (std::is_same<T, int32_t>::value)
+    calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max);
+  else
+    calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max);
+}
+
+// Decompose a double multiplier into a Q0.31 int32 representation of its
+// significand, and shift representation of its exponent.
+//
+// Handles an arbitrary positive multiplier. The 'shift' output-value is
+// basically the 'floating-point exponent' of the multiplier:
+// Negative for a right-shift (when the multiplier is <1), positive for a
+// left-shift (when the multiplier is >1)
+void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
+
+// Decompose a double multiplier into a Q0.31 int32 representation of its
+// significand, and shift representation of NEGATIVE its exponent ---
+// this is intended as a RIGHT-shift.
+//
+// Restricted to the case where the multiplier < 1 (and non-negative).
+void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
+                                         int *left_shift);
+
+Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape);
+
+inline double getQuantizedConvolutionMultipler(float input_scale, float filter_scale,
+                                               float output_scale)
+{
+  const double input_product_scale = static_cast<double>(input_scale * filter_scale);
+  LUCI_INTERPRETER_CHECK(input_product_scale >= 0);
+  return input_product_scale / static_cast<double>(output_scale);
+}
+
+// TODO rename getQuantizedConvolutionMultiplers to something more general
+// it is used for non conv operators too
+inline std::vector<double> getQuantizedConvolutionMultiplers(float input_scale,
+                                                             const std::vector<float> &filter_scale,
+                                                             float output_scale)
+{
+  std::vector<double> effective_output_scales;
+  size_t n = filter_scale.size();
+  effective_output_scales.reserve(n);
+  for (size_t i = 0; i < n; ++i)
+  {
+    effective_output_scales.push_back(
+      getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale));
+  }
+  return effective_output_scales;
+}
+
+struct ChannelQuantMultipliers
+{
+  int shift;
+  int32_t multiplier;
+  ChannelQuantMultipliers() = default;
+};
+
+inline std::vector<ChannelQuantMultipliers>
+quantizeMultipliers(const std::vector<double> &effective_scale)
+{
+  size_t n = effective_scale.size();
+  std::vector<ChannelQuantMultipliers> params(n);
+  for (size_t i = 0; i < n; ++i)
+  {
+    quantizeMultiplier(effective_scale[i], &params[i].multiplier, &params[i].shift);
+  }
+  return params;
+}
+
+// Helper wrapper to hide broadcast logic
+template <typename T> class BroadcastableWrapper
+{
+public:
+  BroadcastableWrapper(const std::vector<T> &v) : _v(v), _stride(v.size() == 1 ? 0 : 1) {}
+
+  T operator[](int idx) { return _v[idx * _stride]; }
+
+private:
+  const std::vector<T> &_v;
+  int _stride;
+};
+
+inline tflite::RuntimeShape getTensorShape(const Tensor *tensor)
+{
+  if (tensor == nullptr)
+    return tflite::RuntimeShape();
+
+  const Shape &shape = tensor->shape();
+  tflite::RuntimeShape runtime_shape(shape.num_dims());
+  for (int i = 0; i < shape.num_dims(); ++i)
+  {
+    runtime_shape.SetDim(i, shape.dim(i));
+  }
+  return runtime_shape;
+}
+
+template <typename T> const T *getTensorData(const Tensor *tensor)
+{
+  return tensor != nullptr ? tensor->data<T>() : nullptr;
+}
+
+template <typename T> T *getTensorData(Tensor *tensor)
+{
+  return tensor != nullptr ? tensor->data<T>() : nullptr;
+}
+
+// A list of tensors in a format that can be used by kernels like split and
+// concatenation.
+template <typename T, bool is_const> class VectorOfTensors
+{
+public:
+  using ElementT = typename std::conditional<is_const, const T, T>::type;
+  using TensorT = typename std::conditional<is_const, const Tensor, Tensor>::type;
+
+  // Build with the tensors in 'tensor_list'.
+  explicit VectorOfTensors(const std::vector<TensorT *> &tensor_list)
+  {
+    const int num_tensors = tensor_list.size();
+
+    all_data_.reserve(num_tensors);
+    all_shape_.reserve(num_tensors);
+    all_shape_ptr_.reserve(num_tensors);
+
+    for (TensorT *tensor : tensor_list)
+    {
+      all_data_.push_back(getTensorData<T>(tensor));
+      all_shape_.push_back(getTensorShape(tensor));
+    }
+
+    // Taking the pointer from inside a std::vector is only OK if the vector is
+    // never modified, so we populate all_shape in the previous loop and then we
+    // are free to grab iterators here.
+    for (tflite::RuntimeShape &shape : all_shape_)
+    {
+      all_shape_ptr_.push_back(&shape);
+    }
+  }
+  // Return a pointer to the data pointers of all tensors in the list. For
+  // example:
+  //   float* const* f = v.data();
+  //   f[0][1] is the second element of the first tensor.
+  ElementT *const *data() const { return all_data_.data(); }
+
+  // Return a pointer the shape pointers of all tensors in the list. For
+  // example:
+  //   const RuntimeShape* const* d = v.dims();
+  //   dims[1] are the dimensions of the second tensor in the list.
+  const tflite::RuntimeShape *const *shapes() const { return all_shape_ptr_.data(); }
+
+private:
+  std::vector<ElementT *> all_data_;
+  std::vector<tflite::RuntimeShape> all_shape_;
+  std::vector<tflite::RuntimeShape *> all_shape_ptr_;
+};
+
+// A list of quantized tensors in a format that can be used by kernels like
+// split and concatenation.
+template <bool is_const> class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t, is_const>
+{
+public:
+  using typename VectorOfTensors<uint8_t, is_const>::TensorT;
+
+  // Build with the tensors in 'tensor_list'.
+  explicit VectorOfQuantizedTensors(const std::vector<TensorT *> &tensor_list)
+    : VectorOfTensors<uint8_t, is_const>(tensor_list)
+  {
+    for (TensorT *tensor : tensor_list)
+    {
+      zero_point_.push_back(tensor->zero_point());
+      scale_.push_back(tensor->scale());
+    }
+  }
+
+  const float *scale() const { return scale_.data(); }
+  const int32_t *zero_point() const { return zero_point_.data(); }
+
+private:
+  std::vector<int32_t> zero_point_;
+  std::vector<float> scale_;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_UTILS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/While.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/While.cpp
new file mode 100644
index 000000000..153bd1a99
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/While.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/While.h"
+#include "kernels/Utils.h"
+
+#include <cstring>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+void copy(const std::vector<const Tensor *> &src, const std::vector<Tensor *> &dst)
+{
+  for (size_t i = 0; i < src.size(); ++i)
+  {
+    LUCI_INTERPRETER_CHECK(dst[i]->element_type() == src[i]->element_type());
+    dst[i]->resize(src[i]->shape());
+
+    const int32_t num_elements = src[i]->shape().num_elements();
+    const std::size_t element_size = getDataTypeSize(src[i]->element_type());
+    std::memcpy(dst[i]->data<void>(), src[i]->data<void>(), num_elements * element_size);
+  }
+}
+
+void copy(const std::vector<Tensor *> &src, const std::vector<Tensor *> &dst)
+{
+  std::vector<const Tensor *> const_src;
+  for (const auto &t : src)
+    const_src.push_back(t);
+  copy(const_src, dst);
+}
+
+// TODO: Think about how allocate memory for output in main graph
+void configureTensorsAllocations(const std::vector<Tensor *> &tensors, RuntimeGraph *run_graph)
+{
+  for (auto tensor : tensors)
+    run_graph->configureAllocations(tensor);
+}
+
+} // namespace
+
+While::While(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs,
+             RuntimeGraph *cond_graph, RuntimeGraph *body_graph)
+  : Kernel(std::move(inputs), std::move(outputs)), _cond_graph(cond_graph), _body_graph(body_graph)
+{
+}
+
+void While::configure()
+{
+  LUCI_INTERPRETER_CHECK(_body_graph->getInputTensors().size() == getInputTensors().size());
+  LUCI_INTERPRETER_CHECK(_body_graph->getOutputTensors().size() == getOutputTensors().size());
+  LUCI_INTERPRETER_CHECK(_body_graph->getOutputTensors().size() == getInputTensors().size());
+
+  LUCI_INTERPRETER_CHECK(_cond_graph->getInputTensors().size() == getInputTensors().size());
+
+  const auto &cond_outputs = _cond_graph->getOutputTensors();
+  LUCI_INTERPRETER_CHECK(cond_outputs.size() == 1)
+  LUCI_INTERPRETER_CHECK(cond_outputs[0]->element_type() == DataType::BOOL);
+}
+
+/**
+ * @note Dynamic shape such as {1, 0, 8} may fail in tensor->data()
+ */
+void While::execute() const
+{
+  const auto &cond_inputs = _cond_graph->getInputTensors();
+  const auto &cond_outputs = _cond_graph->getOutputTensors();
+
+  configureTensorsAllocations(cond_inputs, _cond_graph);
+
+  copy(getInputTensors(), cond_inputs);
+
+  const auto &body_inputs = _body_graph->getInputTensors();
+  const auto &body_outputs = _body_graph->getOutputTensors();
+
+  configureTensorsAllocations(body_inputs, _body_graph);
+
+  while (true)
+  {
+    _cond_graph->execute();
+
+    bool cond_value = cond_outputs[0]->data<bool>()[0];
+    if (!cond_value)
+      break;
+
+    copy(cond_inputs, body_inputs);
+
+    _body_graph->execute();
+
+    copy(body_outputs, cond_inputs);
+  }
+
+  copy(cond_inputs, getOutputTensors());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/While.h b/compiler/luci-micro/luci-interpreter/src/kernels/While.h
new file mode 100644
index 000000000..f758df3f3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/While.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_WHILE_H
+#define LUCI_INTERPRETER_KERNELS_WHILE_H
+
+#include "core/Kernel.h"
+#include "core/RuntimeGraph.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class While : public Kernel
+{
+public:
+  While(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs, RuntimeGraph *cond_graph,
+        RuntimeGraph *body_graph);
+
+  const Tensor *input(int index) const { return _inputs[index]; }
+  Tensor *output(int index) const { return _outputs[index]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  RuntimeGraph *const _cond_graph = nullptr;
+  RuntimeGraph *const _body_graph = nullptr;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_WHILE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/While.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/While.test.cpp
new file mode 100644
index 000000000..cb8f89130
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/While.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/RuntimeModule.h"
+#include "kernels/Add.h"
+#include "kernels/Less.h"
+#include "kernels/While.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+RuntimeGraph *buildCondSubgraph(RuntimeModule *module, DataType dtype, Tensor *input_cond,
+                                IMemoryManager *memory_manager)
+{
+  RuntimeGraph *graph = module->addGraph(memory_manager);
+  Tensor *input =
+    graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, ""));
+  Tensor *output =
+    graph->addTensor(std::make_unique<Tensor>(DataType::BOOL, Shape{}, AffineQuantization{}, ""));
+
+  memory_manager->allocate_memory(*input);
+  memory_manager->allocate_memory(*output);
+
+  graph->setInputTensors({input});
+  graph->setOutputTensors({output});
+
+  graph->addKernel(std::make_unique<Less>(input, input_cond, output));
+
+  return graph;
+}
+
+RuntimeGraph *buildBodySubgraph(RuntimeModule *module, DataType dtype, Tensor *input_add,
+                                IMemoryManager *memory_manager)
+{
+  RuntimeGraph *graph = module->addGraph(memory_manager);
+  Tensor *input =
+    graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, ""));
+  Tensor *output =
+    graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, ""));
+
+  memory_manager->allocate_memory(*input);
+  memory_manager->allocate_memory(*output);
+
+  graph->setInputTensors({input});
+  graph->setOutputTensors({output});
+
+  AddParams params{};
+  params.activation = Activation::NONE;
+  graph->addKernel(std::make_unique<Add>(input, input_add, output, params));
+
+  return graph;
+}
+
+TEST(WhileTest, FloatLoop10)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input = makeInputTensor<DataType::FLOAT32>({1}, {1}, memory_manager.get());
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor input_cond = makeInputTensor<DataType::FLOAT32>({1}, {10}, memory_manager.get());
+  Tensor input_add = makeInputTensor<DataType::FLOAT32>({1}, {1}, memory_manager.get());
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *cond_graph =
+    buildCondSubgraph(&module, DataType::FLOAT32, &input_cond, memory_manager.get());
+  RuntimeGraph *body_graph =
+    buildBodySubgraph(&module, DataType::FLOAT32, &input_add, memory_manager.get());
+
+  While kernel({&input}, {&output}, cond_graph, body_graph);
+  kernel.configure();
+  memory_manager->allocate_memory(output);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({10}));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/loader/CMakeLists.txt
new file mode 100644
index 000000000..292771592
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/CMakeLists.txt
@@ -0,0 +1,39 @@
+set(SOURCES
+    GraphLoader.h
+    GraphLoader.cpp
+    KernelBuilderHelper.h
+    KernelBuilderHelper.cpp
+    KernelBuilder.h
+    KernelBuilder.cpp
+    ModuleLoader.h
+    ModuleLoader.cpp
+    RuntimeToIR.h
+    nodes/Builders.h)
+
+# include kernel specific builders
+macro(REGISTER_KERNEL NODE)
+  list(APPEND SOURCES "nodes/${NODE}.cpp")
+endmacro(REGISTER_KERNEL)
+include(${KERNEL_REGISTER_FILE})
+
+add_library(${LUCI_INTERPRETER_LOADER} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_PAL_DIR}")
+target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
+
+target_link_libraries(${LUCI_INTERPRETER_LOADER}
+        PUBLIC luci_lang ${LUCI_INTERPRETER_CORE}
+        PRIVATE ${LUCI_INTERPRETER_KERNELS} nncc_common luci_plan)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+set(TEST_SOURCES KernelBuilder.test.cpp)
+
+GTest_AddTest(${LUCI_INTERPRETER_LOADER}_test ${TEST_SOURCES})
+target_link_libraries(${LUCI_INTERPRETER_LOADER}_test ${LUCI_INTERPRETER_LOADER})
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.cpp
new file mode 100644
index 000000000..40207090b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.cpp
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/GraphLoader.h"
+
+#include "loader/KernelBuilder.h"
+
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+#include <loco/IR/Algorithm.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+template <typename NodeT> Shape getNodeShape(const NodeT *node)
+{
+  Shape shape(node->rank());
+  for (uint32_t i = 0; i < node->rank(); ++i)
+  {
+    shape.dim(i) = node->dim(i).value();
+  }
+  return shape;
+}
+
+template <DataType DT> const void *getNodeDataImpl(const luci::CircleConst *node, size_t *data_size)
+{
+  const size_t element_size = getDataTypeSize(DT);
+  const int32_t num_elements = node->size<DT>();
+
+  *data_size = num_elements * element_size;
+  if (*data_size > 0)
+  {
+    // FIXME There is no good way to get the pointer to the data currently.
+    return &node->at<DT>(0);
+  }
+  return nullptr;
+}
+
+const void *getNodeData(const luci::CircleConst *node, size_t *data_size)
+{
+  switch (node->dtype())
+  {
+    case DataType::U8:
+      return getNodeDataImpl<DataType::U8>(node, data_size);
+    case DataType::FLOAT32:
+      return getNodeDataImpl<DataType::FLOAT32>(node, data_size);
+    case DataType::S8:
+      return getNodeDataImpl<DataType::S8>(node, data_size);
+    case DataType::S16:
+      return getNodeDataImpl<DataType::S16>(node, data_size);
+    case DataType::S32:
+      return getNodeDataImpl<DataType::S32>(node, data_size);
+    case DataType::S64:
+      return getNodeDataImpl<DataType::S64>(node, data_size);
+    case DataType::BOOL:
+      return getNodeDataImpl<DataType::BOOL>(node, data_size);
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+const void *getNodeData(const luci::CircleCustom *node, size_t *data_size)
+{
+  if (node->custom_code() != "CircleReferencingConst")
+    return nullptr;
+
+  // helper struct which describes data loaded to custom_options of CircleReferencingConst node
+  // TODO move this struct to header
+  struct ConstDataReference
+  {
+    const uint8_t *data = nullptr;
+    uint32_t size = 0;
+  };
+
+  const auto &custom_options = node->custom_options();
+  const auto &const_data_ref = *reinterpret_cast<const ConstDataReference *>(custom_options.data());
+
+  *data_size = const_data_ref.size;
+  return const_data_ref.data;
+}
+
+bool isExecutableNode(const luci::CircleNode *node)
+{
+  switch (node->opcode())
+  {
+    // These nodes denote inputs / outputs of a graph.
+    case luci::CircleOpcode::CIRCLECONST:
+    case luci::CircleOpcode::CIRCLEINPUT:
+    case luci::CircleOpcode::CIRCLEOUTPUT:
+    case luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE:
+    // The following nodes denote outputs of multiple-output nodes.
+    case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT:
+    case luci::CircleOpcode::CIRCLECUSTOMOUT:
+    case luci::CircleOpcode::CIRCLEIFOUT:
+    case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT:
+    case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT:
+    case luci::CircleOpcode::CIRCLESPLITOUT:
+    case luci::CircleOpcode::CIRCLESPLITVOUT:
+    case luci::CircleOpcode::CIRCLETOPKV2OUT:
+    case luci::CircleOpcode::CIRCLEUNIQUEOUT:
+    case luci::CircleOpcode::CIRCLEUNPACKOUT:
+    case luci::CircleOpcode::CIRCLEVARIABLE:
+    case luci::CircleOpcode::CIRCLEWHILEOUT:
+      return false;
+    // Custom nodes may be executable and non-executable
+    case luci::CircleOpcode::CUSTOM:
+    {
+      auto const custom_node = loco::must_cast<const luci::CircleCustom *>(node);
+
+      // TODO handle more non-executable Custom ops here
+      if (custom_node->custom_code() == "CircleReferencingConst")
+        return false;
+
+      return true;
+    }
+    default:
+      return true;
+  }
+}
+
+bool isTensorProducingNode(const luci::CircleNode *node)
+{
+  switch (node->opcode())
+  {
+    // Output nodes do not produce tensors.
+    case luci::CircleOpcode::CIRCLEOUTPUT:
+    // The following nodes are multiple-output nodes. They do not produce tensors, the tensors
+    // are produced by the corresponding *Out nodes instead.
+    case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM:
+    case luci::CircleOpcode::CUSTOM:
+    case luci::CircleOpcode::IF:
+    case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4:
+    case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5:
+    case luci::CircleOpcode::SPLIT:
+    case luci::CircleOpcode::SPLIT_V:
+    case luci::CircleOpcode::TOPK_V2:
+    case luci::CircleOpcode::UNIQUE:
+    case luci::CircleOpcode::UNPACK:
+    case luci::CircleOpcode::WHILE:
+      return false;
+    default:
+      return true;
+  }
+}
+
+bool isSupportedCustomNode(const luci::CircleNode *node)
+{
+  const auto custom_node = loco::must_cast<const luci::CircleCustom *>(node);
+
+  // TODO handle more Custom ops here
+  if (custom_node->custom_code() == "CircleReferencingConst")
+    return true;
+
+  return false;
+}
+
+} // namespace
+
+GraphLoader::GraphLoader(
+  const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
+  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+  std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, IMemoryManager *memory_manager)
+  : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir),
+    _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor),
+    _memory_manager(memory_manager)
+{
+}
+
+void GraphLoader::loadTensors()
+{
+  for (uint32_t i = 0; i < _graph->nodes()->size(); ++i)
+  {
+    const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i));
+
+    if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node))
+      throw std::runtime_error("Unsupported Custom operator. " + node->name());
+
+    if (!isTensorProducingNode(node))
+      continue;
+
+    // Only Input, Const, Custom and Variable nodes have shapes. Shapes of intermediate tensors will
+    // be inferred.
+    Shape shape{};
+    switch (node->opcode())
+    {
+      case luci::CircleOpcode::CIRCLECONST:
+      case luci::CircleOpcode::CIRCLECUSTOMOUT:
+      case luci::CircleOpcode::CIRCLEINPUT:
+      case luci::CircleOpcode::CIRCLEVARIABLE:
+        shape = getNodeShape(node);
+        break;
+      default:
+        break;
+    }
+
+    AffineQuantization quantization;
+    if (node->quantparam() != nullptr)
+    {
+      const luci::CircleQuantParam *params = node->quantparam();
+      assert(params->scale.size() == params->zerop.size());
+      quantization.scale.assign(params->scale.cbegin(), params->scale.cend());
+      quantization.zero_point.assign(params->zerop.cbegin(), params->zerop.cend());
+      quantization.quantized_dimension = params->quantized_dimension;
+    }
+
+    auto tensor = std::make_unique<Tensor>(node->dtype(), std::move(shape), std::move(quantization),
+                                           node->name());
+
+    // If node has execution plan then read memory offsets for nodes
+    // from the beginning of shared memory buffer. Used in Static Memory Manager.
+    if (luci::has_execution_plan(node))
+    {
+      auto execution_plan = luci::get_execution_plan(node);
+      assert(!execution_plan.offsets().empty());
+      tensor->set_offset(execution_plan.offsets().front());
+    }
+
+    if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
+    {
+      size_t data_size{};
+      const void *const_data = getNodeData(const_node, &data_size);
+      if (const_data != nullptr)
+      {
+        _memory_manager->allocate_memory(*tensor);
+        tensor->writeData(const_data, data_size);
+      }
+    }
+    else if (const auto *custom_out_node = dynamic_cast<const luci::CircleCustomOut *>(node))
+    {
+      const auto *custom_node =
+        loco::must_cast<const luci::CircleCustom *>(custom_out_node->input());
+
+      if (custom_node->custom_code() == "CircleReferencingConst")
+      {
+        size_t data_size{};
+        const void *const_data = getNodeData(custom_node, &data_size);
+        if (const_data != nullptr)
+        {
+          _memory_manager->allocate_memory(*tensor);
+          tensor->writeData(const_data, data_size);
+        }
+      }
+    }
+
+    _node_to_tensor.emplace(node, tensor.get());
+    _runtime_to_ir.tensor_to_node.emplace(tensor.get(), node);
+
+    _runtime_graph->addTensor(std::move(tensor));
+  }
+}
+
+void GraphLoader::initInputOutputTensors() const
+{
+  auto input_nodes = loco::input_nodes(_graph);
+  std::vector<Tensor *> input_tensors(input_nodes.size());
+  for (size_t i = 0; i < input_nodes.size(); ++i)
+  {
+    input_tensors[i] = _node_to_tensor.at(input_nodes[i]);
+    _memory_manager->allocate_memory(*input_tensors[i]);
+  }
+  _runtime_graph->setInputTensors(input_tensors);
+
+  auto output_nodes = loco::output_nodes(const_cast<loco::Graph *>(_graph));
+  std::vector<Tensor *> output_tensors(output_nodes.size());
+  for (size_t i = 0; i < output_nodes.size(); ++i)
+  {
+    const auto *node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+    output_tensors[i] = _node_to_tensor.at(node->from());
+  }
+  _runtime_graph->setOutputTensors(output_tensors);
+}
+
+void GraphLoader::loadOperators()
+{
+  KernelBuilder kernel_builder(_graph_to_runtime_graph, _node_to_tensor);
+
+  // Create kernels for executable nodes. This has to be done in execution order.
+  auto graph = const_cast<loco::Graph *>(_graph);
+
+  auto const graph_nodes = loco::all_nodes(graph);
+
+  // Checking for execution plan in node annotations.
+  bool has_execution_annotation = true;
+  auto const checking_exec_plan = [&has_execution_annotation](auto const node) {
+    const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);
+    if (!luci::has_execution_plan(circle_node))
+      has_execution_annotation = false;
+  };
+  std::for_each(begin(graph_nodes), end(graph_nodes), checking_exec_plan);
+
+  if (has_execution_annotation)
+  {
+    // Build ordered_nodes vector that stores the order of execution of graph nodes.
+    std::vector<const luci::CircleNode *> ordered_nodes(graph_nodes.size());
+
+    auto const filler = [&ordered_nodes](auto const node) {
+      const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);
+      auto const position = luci::get_execution_plan(circle_node).order_in_plan();
+      ordered_nodes.at(position) = circle_node;
+    };
+    std::for_each(begin(graph_nodes), end(graph_nodes), filler);
+
+    for (auto node : ordered_nodes)
+    {
+      if (isExecutableNode(node))
+      {
+        std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
+        _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
+        _runtime_graph->addKernel(std::move(kernel));
+      }
+    }
+  }
+  else
+  {
+    // If it is impossible to build the execution order plan,
+    // then we use the default postorder_traversal approach.
+    for (const loco::Node *loco_node : loco::postorder_traversal(loco::output_nodes(graph)))
+    {
+      const auto *node = loco::must_cast<const luci::CircleNode *>(loco_node);
+      if (isExecutableNode(node))
+      {
+        std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
+        _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
+        _runtime_graph->addKernel(std::move(kernel));
+      }
+    }
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.h b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.h
new file mode 100644
index 000000000..fe066ecf8
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
+#define LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
+
+#include "core/RuntimeGraph.h"
+#include "loader/RuntimeToIR.h"
+#include "luci_interpreter/MemoryManager.h"
+
+#include <loco/IR/Graph.h>
+
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class GraphLoader
+{
+public:
+  GraphLoader(const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
+              const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+              std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
+              IMemoryManager *memory_manager);
+
+  void loadTensors();
+  void initInputOutputTensors() const;
+  void loadOperators();
+
+private:
+  const loco::Graph *_graph;
+  RuntimeGraph *_runtime_graph;
+  RuntimeToIR &_runtime_to_ir;
+  IMemoryManager *_memory_manager;
+
+  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph;
+  std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.cpp b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.cpp
new file mode 100644
index 000000000..8483a9a3d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/KernelBuilder.h"
+#include "loader/nodes/Builders.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+#define CIRCLE_NODE(OPCODE, CLASS) CLASS,
+#define CIRCLE_VNODE(OPCODE, CLASS) CLASS,
+
+// This enum is auxiliary.
+// It is duplicate of luci::CircleOpcode but initialized with CLASS instead of OPCODE,
+// because list of target operators is in format of CLASS names
+enum class BuilderId
+{
+#include <luci/IR/CircleNodes.lst>
+  Size // casts to count of values in BuilderId enum
+};
+
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
+
+/**
+ * @brief Registry of kernel builders
+ *
+ * This class contains mapping from Opcodes to kernel builder functions
+ */
+
+class KernelBuilderRegistry
+{
+public:
+  using KernelBuilderFunc = std::unique_ptr<Kernel>(const luci::CircleNode *,
+                                                    KernelBuilderHelper &);
+
+  KernelBuilderRegistry() : _operator_builders(size_t(BuilderId::Size), nullptr)
+  {
+#define REGISTER_KERNEL(name) \
+  register_kernel_builder(BuilderId::Circle##name, build_kernel_Circle##name);
+
+#include "KernelsToBuild.lst"
+
+#undef REGISTER_KERNEL
+  }
+
+  KernelBuilderFunc *get_kernel_builder_func(luci::CircleOpcode opcode) const
+  {
+    return _operator_builders.at(size_t(opcode));
+  }
+
+private:
+  std::vector<KernelBuilderFunc *> _operator_builders;
+
+  void register_kernel_builder(BuilderId id, KernelBuilderFunc *func)
+  {
+    // Using BuilderId is a duplicate of luci::CirclreOpcode,
+    // size_t(id) is equal to size_t(corresponding operation opcode).
+    assert(size_t(id) < _operator_builders.size());
+    _operator_builders[size_t(id)] = func;
+  }
+};
+
+KernelBuilder::KernelBuilder(
+  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+  const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+  : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
+{
+  _builder_registry = std::make_unique<KernelBuilderRegistry>();
+}
+
+KernelBuilder::~KernelBuilder()
+{
+  // Need to define in this CPP to hide KernelBuilderRegistry internals.
+  // This destructor deletes _builder_registry
+}
+
+std::unique_ptr<Kernel> KernelBuilder::build(const luci::CircleNode *node)
+{
+  auto specific_builder = _builder_registry->get_kernel_builder_func(node->opcode());
+  if (specific_builder != nullptr)
+    return specific_builder(node, *this);
+
+  std::string msg = "Unsupported operator: ";
+  msg += std::to_string(static_cast<uint32_t>(node->opcode())) + " " + std::string(node->name());
+  throw std::invalid_argument(msg.c_str());
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.h b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.h
new file mode 100644
index 000000000..b1f383394
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
+#define LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
+
+#include "loader/KernelBuilderHelper.h"
+
+#include "core/Kernel.h"
+#include "core/RuntimeGraph.h"
+
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <memory>
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class KernelBuilderRegistry;
+
+class KernelBuilder : public KernelBuilderHelper
+{
+public:
+  KernelBuilder(
+    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+    const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor);
+
+  ~KernelBuilder();
+
+  std::unique_ptr<Kernel> build(const luci::CircleNode *node);
+
+private:
+  std::unique_ptr<KernelBuilderRegistry> _builder_registry;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.test.cpp
new file mode 100644
index 000000000..b221b6921
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.test.cpp
@@ -0,0 +1,1376 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/GraphLoader.h"
+#include "loader/KernelBuilder.h"
+#include "luci_interpreter/SimpleMemoryManager.h"
+
+#include <kernels/Add.h>
+#include <kernels/ArgMax.h>
+#include <kernels/AveragePool2D.h>
+#include <kernels/BatchMatMul.h>
+#include <kernels/Cast.h>
+#include <kernels/Concatenation.h>
+#include <kernels/Conv2D.h>
+#include <kernels/DepthToSpace.h>
+#include <kernels/DepthwiseConv2D.h>
+#include <kernels/Div.h>
+#include <kernels/Elu.h>
+#include <kernels/Exp.h>
+#include <kernels/Floor.h>
+#include <kernels/FloorDiv.h>
+#include <kernels/Equal.h>
+#include <kernels/FullyConnected.h>
+#include <kernels/Greater.h>
+#include <kernels/GreaterEqual.h>
+#include <kernels/InstanceNorm.h>
+#include <kernels/L2Normalize.h>
+#include <kernels/L2Pool2D.h>
+#include <kernels/LeakyRelu.h>
+#include <kernels/Less.h>
+#include <kernels/LessEqual.h>
+#include <kernels/LocalResponseNormalization.h>
+#include <kernels/LogicalAnd.h>
+#include <kernels/LogicalNot.h>
+#include <kernels/LogicalOr.h>
+#include <kernels/Logistic.h>
+#include <kernels/LogSoftmax.h>
+#include <kernels/Maximum.h>
+#include <kernels/MaxPool2D.h>
+#include <kernels/Mean.h>
+#include <kernels/Minimum.h>
+#include <kernels/Mul.h>
+#include <kernels/Neg.h>
+#include <kernels/NotEqual.h>
+#include <kernels/OneHot.h>
+#include <kernels/Pad.h>
+#include <kernels/PadV2.h>
+#include <kernels/Pow.h>
+#include <kernels/PRelu.h>
+#include <kernels/Relu.h>
+#include <kernels/Relu6.h>
+#include <kernels/Reshape.h>
+#include <kernels/ResizeBilinear.h>
+#include <kernels/ResizeNearestNeighbor.h>
+#include <kernels/ReverseV2.h>
+#include <kernels/Rsqrt.h>
+#include <kernels/Slice.h>
+#include <kernels/Softmax.h>
+#include <kernels/SpaceToDepth.h>
+#include <kernels/Split.h>
+#include <kernels/SplitV.h>
+#include <kernels/Sqrt.h>
+#include <kernels/SquaredDifference.h>
+#include <kernels/Squeeze.h>
+#include <kernels/StridedSlice.h>
+#include <kernels/Sub.h>
+#include <kernels/Tanh.h>
+#include <kernels/Transpose.h>
+#include <kernels/TransposeConv.h>
+#include <kernels/Unpack.h>
+
+#include <gmock/gmock.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class KernelBuilderTest : public Test
+{
+protected:
+  luci::CircleInput *createInputNode() { return createNode<luci::CircleInput>(); }
+  void SetUp() override { _memory_manager = std::make_unique<SimpleMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+
+  template <typename NodeT, typename... Args> NodeT *createNode(Args &&... args)
+  {
+    auto *node = _graph.nodes()->create<NodeT>(std::forward<Args>(args)...);
+    // The actual type does not matter for the purpose of the tests.
+    // NOTE The type is meaningless for nodes with multiple outputs (corresponding *Out nodes carry
+    //  actual output types).
+    node->dtype(loco::DataType::FLOAT32);
+    return node;
+  }
+
+  template <typename NodeOutT> NodeOutT *createNodeOut(loco::Node *node, int index)
+  {
+    auto *node_out = createNode<NodeOutT>();
+    node_out->input(node);
+    node_out->index(index);
+    return node_out;
+  }
+
+  template <typename KernelT> std::unique_ptr<KernelT> buildKernel(const luci::CircleNode *op)
+  {
+    std::unordered_map<const loco::Graph *, RuntimeGraph *> graph_to_runtime_graph;
+
+    RuntimeGraph runtime_graph(nullptr, _memory_manager.get());
+    graph_to_runtime_graph[&_graph] = &runtime_graph;
+    RuntimeToIR runtime_to_ir;
+    GraphLoader graph_loader(&_graph, &runtime_graph, runtime_to_ir, graph_to_runtime_graph,
+                             _node_to_tensor, _memory_manager.get());
+    graph_loader.loadTensors();
+
+    KernelBuilder kernel_builder(graph_to_runtime_graph, _node_to_tensor);
+
+    auto kernel = kernel_builder.build(op);
+    return std::unique_ptr<KernelT>(dynamic_cast<KernelT *>(kernel.release()));
+  }
+
+  void checkTensor(const Tensor *tensor, const loco::Node *node)
+  {
+    EXPECT_THAT(tensor, Eq(_node_to_tensor.at(node)));
+  }
+
+private:
+  loco::Graph _graph;
+  std::unordered_map<const loco::Node *, Tensor *> _node_to_tensor;
+};
+
+TEST_F(KernelBuilderTest, Add)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleAdd>();
+  op->x(input1);
+  op->y(input2);
+
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::Add>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, ArgMax)
+{
+  auto *input = createInputNode();
+  auto *axis = createInputNode();
+
+  auto *op = createNode<luci::CircleArgMax>();
+  op->input(input);
+  op->dimension(axis);
+
+  op->output_type(loco::DataType::FLOAT32);
+
+  auto kernel = buildKernel<kernels::ArgMax>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->axis(), axis);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().output_type, Eq(op->output_type()));
+}
+
+TEST_F(KernelBuilderTest, AveragePool2D)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleAveragePool2D>();
+  op->value(input);
+
+  op->padding(luci::Padding::SAME);
+  op->filter()->h(11);
+  op->filter()->w(13);
+  op->stride()->h(17);
+  op->stride()->w(19);
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::AveragePool2D>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+  EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
+  EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
+  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, BatchMatMul)
+{
+  auto *lhs = createInputNode();
+  auto *rhs = createInputNode();
+
+  auto *op = createNode<luci::CircleBatchMatMul>();
+  op->x(lhs);
+  op->y(rhs);
+  op->adj_x(false);
+  op->adj_y(false);
+
+  auto kernel = buildKernel<kernels::BatchMatMul>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), lhs);
+  checkTensor(kernel->y(), rhs);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().adj_x, Eq(op->adj_x()));
+  EXPECT_THAT(kernel->params().adj_y, Eq(op->adj_y()));
+}
+
+TEST_F(KernelBuilderTest, Cast)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleCast>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Cast>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Concatenation)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleConcatenation>(2);
+  op->values(0, input1);
+  op->values(1, input2);
+  op->axis(11);
+
+  auto kernel = buildKernel<kernels::Concatenation>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(0), input1);
+  checkTensor(kernel->input(1), input2);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Conv2D)
+{
+  auto *input = createInputNode();
+  auto *filter = createInputNode();
+  auto *bias = createInputNode();
+
+  auto *op = createNode<luci::CircleConv2D>();
+  op->input(input);
+  op->filter(filter);
+  op->bias(bias);
+
+  op->padding(luci::Padding::SAME);
+  op->stride()->h(11);
+  op->stride()->w(13);
+  op->dilation()->h(17);
+  op->dilation()->w(19);
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::Conv2D>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->filter(), filter);
+  checkTensor(kernel->bias(), bias);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+  EXPECT_THAT(kernel->params().dilation_height_factor, Eq(op->dilation()->h()));
+  EXPECT_THAT(kernel->params().dilation_width_factor, Eq(op->dilation()->w()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, DepthToSpace)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleDepthToSpace>();
+  op->input(input);
+
+  op->block_size(11);
+
+  auto kernel = buildKernel<kernels::DepthToSpace>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().block_size, Eq(op->block_size()));
+}
+
+TEST_F(KernelBuilderTest, DepthwiseConv2D)
+{
+  auto *input = createInputNode();
+  auto *filter = createInputNode();
+  auto *bias = createInputNode();
+
+  auto *op = createNode<luci::CircleDepthwiseConv2D>();
+  op->input(input);
+  op->filter(filter);
+  op->bias(bias);
+
+  op->padding(luci::Padding::SAME);
+  op->depthMultiplier(11);
+  op->stride()->h(13);
+  op->stride()->w(17);
+  op->dilation()->h(19);
+  op->dilation()->w(23);
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::DepthwiseConv2D>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->filter(), filter);
+  checkTensor(kernel->bias(), bias);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+  EXPECT_THAT(kernel->params().depth_multiplier, Eq(op->depthMultiplier()));
+  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+  EXPECT_THAT(kernel->params().dilation_height_factor, Eq(op->dilation()->h()));
+  EXPECT_THAT(kernel->params().dilation_width_factor, Eq(op->dilation()->w()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Div)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleDiv>();
+  op->x(input1);
+  op->y(input2);
+
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::Div>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Elu)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleElu>();
+  op->features(input);
+
+  auto kernel = buildKernel<kernels::Elu>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Exp)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleExp>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Exp>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Floor)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleFloor>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Floor>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, FloorDiv)
+{
+  auto *x = createInputNode();
+  auto *y = createInputNode();
+
+  auto *op = createNode<luci::CircleFloorDiv>();
+  op->x(x);
+  op->y(y);
+
+  auto kernel = buildKernel<kernels::FloorDiv>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x);
+  checkTensor(kernel->y(), y);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Equal)
+{
+  auto *x_input = createInputNode();
+  auto *y_input = createInputNode();
+
+  auto *op = createNode<luci::CircleEqual>();
+  op->x(x_input);
+  op->y(y_input);
+
+  auto kernel = buildKernel<kernels::Equal>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x_input);
+  checkTensor(kernel->y(), y_input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, FullyConnected)
+{
+  auto *input = createInputNode();
+  auto *weights = createInputNode();
+  auto *bias = createInputNode();
+
+  auto *op = createNode<luci::CircleFullyConnected>();
+  op->input(input);
+  op->weights(weights);
+  op->bias(bias);
+
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::FullyConnected>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->weights(), weights);
+  checkTensor(kernel->bias(), bias);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Greater)
+{
+  auto *x_input = createInputNode();
+  auto *y_input = createInputNode();
+
+  auto *op = createNode<luci::CircleGreater>();
+  op->x(x_input);
+  op->y(y_input);
+
+  auto kernel = buildKernel<kernels::Greater>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x_input);
+  checkTensor(kernel->y(), y_input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, GreaterEqual)
+{
+  auto *x_input = createInputNode();
+  auto *y_input = createInputNode();
+
+  auto *op = createNode<luci::CircleGreaterEqual>();
+  op->x(x_input);
+  op->y(y_input);
+
+  auto kernel = buildKernel<kernels::GreaterEqual>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x_input);
+  checkTensor(kernel->y(), y_input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, InstanceNorm)
+{
+  auto *input = createInputNode();
+  auto *gamma = createInputNode();
+  auto *beta = createInputNode();
+
+  auto *op = createNode<luci::CircleInstanceNorm>();
+  op->input(input);
+  op->gamma(gamma);
+  op->beta(beta);
+
+  op->epsilon(1e-05);
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::InstanceNorm>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->gamma(), gamma);
+  checkTensor(kernel->beta(), beta);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().epsilon, Eq(op->epsilon()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, L2Normalize)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleL2Normalize>();
+  op->x(input);
+
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::L2Normalize>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, L2Pool2D)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleL2Pool2D>();
+  op->value(input);
+
+  op->padding(luci::Padding::SAME);
+  op->filter()->h(11);
+  op->filter()->w(13);
+  op->stride()->h(17);
+  op->stride()->w(19);
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::L2Pool2D>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+  EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
+  EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
+  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, LeakyRelu)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleLeakyRelu>();
+  op->features(input);
+
+  op->alpha(11.0f);
+
+  auto kernel = buildKernel<kernels::LeakyRelu>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().alpha, Eq(op->alpha()));
+}
+
+TEST_F(KernelBuilderTest, Less)
+{
+  auto *x_input = createInputNode();
+  auto *y_input = createInputNode();
+
+  auto *op = createNode<luci::CircleLess>();
+  op->x(x_input);
+  op->y(y_input);
+
+  auto kernel = buildKernel<kernels::Less>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x_input);
+  checkTensor(kernel->y(), y_input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, LessEqual)
+{
+  auto *x_input = createInputNode();
+  auto *y_input = createInputNode();
+
+  auto *op = createNode<luci::CircleLessEqual>();
+  op->x(x_input);
+  op->y(y_input);
+
+  auto kernel = buildKernel<kernels::LessEqual>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x_input);
+  checkTensor(kernel->y(), y_input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, LocalResponseNormalization)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleLocalResponseNormalization>();
+  op->input(input);
+
+  op->radius(11);
+  op->bias(13.0f);
+  op->alpha(15.0f);
+  op->beta(17.0f);
+
+  auto kernel = buildKernel<kernels::LocalResponseNormalization>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().radius, Eq(op->radius()));
+  EXPECT_THAT(kernel->params().bias, Eq(op->bias()));
+  EXPECT_THAT(kernel->params().alpha, Eq(op->alpha()));
+  EXPECT_THAT(kernel->params().beta, Eq(op->beta()));
+}
+
+TEST_F(KernelBuilderTest, LogicalAnd)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleLogicalAnd>();
+  op->x(input1);
+  op->y(input2);
+
+  auto kernel = buildKernel<kernels::LogicalAnd>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, LogicalNot)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleLogicalNot>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::LogicalNot>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, LogicalOr)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleLogicalOr>();
+  op->x(input1);
+  op->y(input2);
+
+  auto kernel = buildKernel<kernels::LogicalOr>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Logistic)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleLogistic>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Logistic>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, LogSoftmax)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleLogSoftmax>();
+  op->logits(input);
+
+  auto kernel = buildKernel<kernels::LogSoftmax>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Maximum)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleMaximum>();
+  op->x(input1);
+  op->y(input2);
+
+  auto kernel = buildKernel<kernels::Maximum>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, MaxPool2D)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleMaxPool2D>();
+  op->value(input);
+
+  op->padding(luci::Padding::SAME);
+  op->filter()->h(11);
+  op->filter()->w(13);
+  op->stride()->h(17);
+  op->stride()->w(19);
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::MaxPool2D>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+  EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
+  EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
+  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Mean)
+{
+  auto *input = createInputNode();
+  auto *axes = createInputNode();
+
+  auto *op = createNode<luci::CircleMean>();
+  op->input(input);
+  op->reduction_indices(axes);
+
+  op->keep_dims(true);
+
+  auto kernel = buildKernel<kernels::Mean>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->axes(), axes);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().keep_dims, Eq(op->keep_dims()));
+}
+
+TEST_F(KernelBuilderTest, Minimum)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleMinimum>();
+  op->x(input1);
+  op->y(input2);
+
+  auto kernel = buildKernel<kernels::Minimum>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Mul)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleMul>();
+  op->x(input1);
+  op->y(input2);
+
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::Mul>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Neg)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleNeg>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Neg>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, NotEqual)
+{
+  auto *x_input = createInputNode();
+  auto *y_input = createInputNode();
+
+  auto *op = createNode<luci::CircleNotEqual>();
+  op->x(x_input);
+  op->y(y_input);
+
+  auto kernel = buildKernel<kernels::NotEqual>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x_input);
+  checkTensor(kernel->y(), y_input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, OneHot)
+{
+  auto *indices = createInputNode();
+  auto *depth = createInputNode();
+  auto *on_value = createInputNode();
+  auto *off_value = createInputNode();
+  auto axis = 1;
+
+  auto *op = createNode<luci::CircleOneHot>();
+  op->indices(indices);
+  op->depth(depth);
+  op->on_value(on_value);
+  op->off_value(off_value);
+  op->axis(axis);
+
+  auto kernel = buildKernel<kernels::OneHot>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->indices(), indices);
+  checkTensor(kernel->depth(), depth);
+  checkTensor(kernel->on_value(), on_value);
+  checkTensor(kernel->off_value(), off_value);
+  EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+}
+
+TEST_F(KernelBuilderTest, Pad)
+{
+  auto *input = createInputNode();
+  auto *paddings = createInputNode();
+
+  auto *op = createNode<luci::CirclePad>();
+  op->input(input);
+  op->paddings(paddings);
+
+  auto kernel = buildKernel<kernels::Pad>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->paddings(), paddings);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, PadV2)
+{
+  auto *input = createInputNode();
+  auto *paddings = createInputNode();
+  auto *constant_values = createInputNode();
+
+  auto *op = createNode<luci::CirclePadV2>();
+  op->input(input);
+  op->paddings(paddings);
+  op->constant_values(constant_values);
+
+  auto kernel = buildKernel<kernels::PadV2>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->paddings(), paddings);
+  checkTensor(kernel->constant_values(), constant_values);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Pow)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CirclePow>();
+  op->x(input1);
+  op->y(input2);
+
+  auto kernel = buildKernel<kernels::Pow>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, PRelu)
+{
+  auto *input = createInputNode();
+  auto *alpha = createInputNode();
+
+  auto *op = createNode<luci::CirclePRelu>();
+  op->input(input);
+  op->alpha(alpha);
+
+  auto kernel = buildKernel<kernels::PRelu>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->alpha(), alpha);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Relu)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleRelu>();
+  op->features(input);
+
+  auto kernel = buildKernel<kernels::Relu>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Relu6)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleRelu6>();
+  op->features(input);
+
+  auto kernel = buildKernel<kernels::Relu6>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Reshape)
+{
+  auto *input = createInputNode();
+  auto *shape = createInputNode();
+
+  auto *op = createNode<luci::CircleReshape>();
+  op->tensor(input);
+  op->shape(shape);
+
+  auto kernel = buildKernel<kernels::Reshape>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->shape(), shape);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, ResizeBilinear)
+{
+  auto *input = createInputNode();
+  auto *size = createInputNode();
+
+  auto *op = createNode<luci::CircleResizeBilinear>();
+  op->input(input);
+  op->size(size);
+  op->align_corners(true);
+  op->half_pixel_centers(true);
+
+  auto kernel = buildKernel<kernels::ResizeBilinear>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->size(), size);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().align_corners, Eq(op->align_corners()));
+  EXPECT_THAT(kernel->params().half_pixel_centers, Eq(op->half_pixel_centers()));
+}
+
+TEST_F(KernelBuilderTest, ResizeNearestNeighbor)
+{
+  auto *input = createInputNode();
+  auto *size = createInputNode();
+
+  auto *op = createNode<luci::CircleResizeNearestNeighbor>();
+  op->input(input);
+  op->size(size);
+  op->align_corners(true);
+
+  auto kernel = buildKernel<kernels::ResizeNearestNeighbor>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->size(), size);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().align_corners, Eq(op->align_corners()));
+  // TODO currently half_pixel_centers are not implemented on CircleResizeNearestNeighbor
+  // after adding, need to be updated.
+}
+
+TEST_F(KernelBuilderTest, ReverseV2)
+{
+  auto *input = createInputNode();
+  auto *axes = createInputNode();
+
+  auto *op = createNode<luci::CircleReverseV2>();
+  op->tensor(input);
+  op->axis(axes);
+
+  auto kernel = buildKernel<kernels::ReverseV2>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->axes(), axes);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Rsqrt)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleRsqrt>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Rsqrt>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Slice)
+{
+  auto *input = createInputNode();
+  auto *begin = createInputNode();
+  auto *size = createInputNode();
+
+  auto *op = createNode<luci::CircleSlice>();
+  op->input(input);
+  op->begin(begin);
+  op->size(size);
+
+  auto kernel = buildKernel<kernels::Slice>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->begin(), begin);
+  checkTensor(kernel->size(), size);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Softmax)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleSoftmax>();
+  op->logits(input);
+
+  op->beta(11.0f);
+
+  auto kernel = buildKernel<kernels::Softmax>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().beta, Eq(op->beta()));
+}
+
+TEST_F(KernelBuilderTest, SpaceToDepth)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleSpaceToDepth>();
+  op->input(input);
+
+  op->block_size(11);
+
+  auto kernel = buildKernel<kernels::SpaceToDepth>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().block_size, op->block_size());
+}
+
+TEST_F(KernelBuilderTest, Split)
+{
+  auto *axis = createInputNode();
+  auto *input = createInputNode();
+  auto *op = createNode<luci::CircleSplit>();
+  auto *output1 = createNodeOut<luci::CircleSplitOut>(op, 0);
+  auto *output2 = createNodeOut<luci::CircleSplitOut>(op, 1);
+
+  op->split_dim(axis);
+  op->input(input);
+
+  op->num_split(2);
+
+  auto kernel = buildKernel<kernels::Split>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->axis(), axis);
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(0), output1);
+  checkTensor(kernel->output(1), output2);
+}
+
+TEST_F(KernelBuilderTest, SplitV)
+{
+  auto *input = createInputNode();
+  auto *size_splits = createInputNode();
+  auto *axis = createInputNode();
+  auto *op = createNode<luci::CircleSplitV>();
+  auto *output0 = createNodeOut<luci::CircleSplitVOut>(op, 0);
+  auto *output1 = createNodeOut<luci::CircleSplitVOut>(op, 1);
+
+  op->input(input);
+  op->size_splits(size_splits);
+  op->split_dim(axis);
+
+  op->num_split(2);
+
+  auto kernel = buildKernel<kernels::SplitV>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->size_splits(), size_splits);
+  checkTensor(kernel->axis(), axis);
+  checkTensor(kernel->output(0), output0);
+  checkTensor(kernel->output(1), output1);
+}
+
+TEST_F(KernelBuilderTest, Sqrt)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleSqrt>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Sqrt>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, SquaredDifference)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleSquaredDifference>();
+  op->x(input1);
+  op->y(input2);
+
+  auto kernel = buildKernel<kernels::SquaredDifference>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Squeeze)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleSqueeze>();
+  op->input(input);
+
+  op->squeeze_dims({11, 13});
+
+  auto kernel = buildKernel<kernels::Squeeze>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().squeeze_dims, ElementsAreArray(op->squeeze_dims()));
+}
+
+TEST_F(KernelBuilderTest, StridedSlice)
+{
+  auto *input = createInputNode();
+  auto *begin = createInputNode();
+  auto *end = createInputNode();
+  auto *strides = createInputNode();
+
+  auto *op = createNode<luci::CircleStridedSlice>();
+  op->input(input);
+  op->begin(begin);
+  op->end(end);
+  op->strides(strides);
+
+  op->begin_mask(11);
+  op->ellipsis_mask(13);
+  op->end_mask(17);
+  op->new_axis_mask(19);
+  op->shrink_axis_mask(23);
+
+  auto kernel = buildKernel<kernels::StridedSlice>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->begin(), begin);
+  checkTensor(kernel->end(), end);
+  checkTensor(kernel->strides(), strides);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().begin_mask, Eq(op->begin_mask()));
+  EXPECT_THAT(kernel->params().ellipsis_mask, Eq(op->ellipsis_mask()));
+  EXPECT_THAT(kernel->params().end_mask, Eq(op->end_mask()));
+  EXPECT_THAT(kernel->params().new_axis_mask, Eq(op->new_axis_mask()));
+  EXPECT_THAT(kernel->params().shrink_axis_mask, Eq(op->shrink_axis_mask()));
+}
+
+TEST_F(KernelBuilderTest, Sub)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleSub>();
+  op->x(input1);
+  op->y(input2);
+
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::Sub>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Tanh)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleTanh>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Tanh>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Transpose)
+{
+  auto *input = createInputNode();
+  auto *perm = createInputNode();
+
+  auto *op = createNode<luci::CircleTranspose>();
+  op->a(input);
+  op->perm(perm);
+
+  auto kernel = buildKernel<kernels::Transpose>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->perm(), perm);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, TransposeConv)
+{
+  auto *output_shape = createInputNode();
+  auto *filter = createInputNode();
+  auto *input = createInputNode();
+  auto *bias = createInputNode();
+
+  auto *op = createNode<luci::CircleTransposeConv>();
+  op->inputSizes(output_shape);
+  op->filter(filter);
+  op->outBackprop(input);
+  op->bias(bias);
+
+  op->padding(luci::Padding::SAME);
+  op->stride()->h(11);
+  op->stride()->w(13);
+
+  auto kernel = buildKernel<kernels::TransposeConv>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->output_shape(), output_shape);
+  checkTensor(kernel->filter(), filter);
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  checkTensor(kernel->bias(), bias);
+  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+}
+
+TEST_F(KernelBuilderTest, Unpack)
+{
+  auto *input = createInputNode();
+  auto *op = createNode<luci::CircleUnpack>();
+  auto *output1 = createNodeOut<luci::CircleUnpackOut>(op, 0);
+  auto *output2 = createNodeOut<luci::CircleUnpackOut>(op, 1);
+
+  op->value(input);
+
+  op->num(2);
+  op->axis(11);
+
+  auto kernel = buildKernel<kernels::Unpack>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(0), output1);
+  checkTensor(kernel->output(1), output2);
+  EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+}
+
+TEST_F(KernelBuilderTest, NonExisting1_NEG)
+{
+  auto *op = createNode<luci::CircleConst>();
+  ASSERT_ANY_THROW(buildKernel<Kernel>(op));
+}
+
+TEST_F(KernelBuilderTest, NonExisting2_NEG)
+{
+  auto *op = createNode<luci::CircleInput>();
+  ASSERT_ANY_THROW(buildKernel<Kernel>(op));
+}
+
+TEST_F(KernelBuilderTest, NonExisting3_NEG)
+{
+  auto *op = createNode<luci::CircleOutput>();
+  ASSERT_ANY_THROW(buildKernel<Kernel>(op));
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.cpp b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.cpp
new file mode 100644
index 000000000..23c96a6db
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/KernelBuilderHelper.h"
+
+#include <luci/IR/Nodes/CircleOutput.h>
+
+namespace luci_interpreter
+{
+
+const Tensor *KernelBuilderHelper::getInputTensor(const loco::Node *node) const
+{
+  const Tensor *tensor = _node_to_tensor.at(node);
+  assert(tensor != nullptr);
+  return tensor;
+}
+
+const Tensor *KernelBuilderHelper::getOptionalInputTensor(const loco::Node *node) const
+{
+  if (dynamic_cast<const luci::CircleOutputExclude *>(node))
+  {
+    return nullptr;
+  }
+  return getInputTensor(node);
+}
+
+Tensor *KernelBuilderHelper::getOutputTensor(const loco::Node *node) const
+{
+  Tensor *tensor = _node_to_tensor.at(node);
+  assert(tensor != nullptr);
+  return tensor;
+}
+
+std::vector<Tensor *>
+KernelBuilderHelper::getOutputTensors(const std::vector<const loco::Node *> &nodes) const
+{
+  std::vector<Tensor *> tensors;
+  tensors.reserve(nodes.size());
+  for (const loco::Node *node : nodes)
+    tensors.push_back(getOutputTensor(node));
+  return tensors;
+}
+
+RuntimeGraph *KernelBuilderHelper::getRuntimeGraph(const loco::Graph *graph) const
+{
+  RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
+  assert(runtime_graph != nullptr);
+  return runtime_graph;
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.h b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.h
new file mode 100644
index 000000000..d6fb253b1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
+#define LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
+
+#include "core/Kernel.h"
+#include "core/RuntimeGraph.h"
+
+#include <loco/IR/Graph.h>
+#include <loco/IR/Node.h>
+
+#include <vector>
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class KernelBuilderHelper
+{
+public:
+  KernelBuilderHelper(
+    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+    const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+    : _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
+  {
+  }
+
+public:
+  const Tensor *getInputTensor(const loco::Node *node) const;
+  const Tensor *getOptionalInputTensor(const loco::Node *node) const;
+
+  Tensor *getOutputTensor(const loco::Node *node) const;
+  std::vector<Tensor *> getOutputTensors(const std::vector<const loco::Node *> &nodes) const;
+
+  RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const;
+
+public:
+  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph() const
+  {
+    return _graph_to_runtime_graph;
+  }
+
+  const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor() const
+  {
+    return _node_to_tensor;
+  }
+
+private:
+  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph;
+  const std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
+};
+
+template <typename CircleNodeOut>
+std::vector<const loco::Node *> collectOutputNodes(const loco::Node *node)
+{
+  std::vector<const CircleNodeOut *> output_nodes;
+  for (const loco::Node *loco_node : loco::succs(node))
+  {
+    output_nodes.push_back(loco::must_cast<const CircleNodeOut *>(loco_node));
+  }
+  std::sort(output_nodes.begin(), output_nodes.end(),
+            [](const CircleNodeOut *node1, const CircleNodeOut *node2) {
+              return node1->index() < node2->index();
+            });
+  return {output_nodes.cbegin(), output_nodes.cend()};
+}
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.cpp b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.cpp
new file mode 100644
index 000000000..2f278b087
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModuleLoader.h"
+
+#include "GraphLoader.h"
+
+namespace luci_interpreter
+{
+
+ModuleLoader::ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
+                           RuntimeToIR &runtime_to_ir,
+                           std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
+                           IMemoryManager *memory_manager)
+  : _module(module), _runtime_module(runtime_module), _runtime_to_ir(runtime_to_ir),
+    _node_to_tensor(node_to_tensor), _memory_manager(memory_manager)
+{
+}
+
+void ModuleLoader::load()
+{
+  // Runtime graphs have to be created in advance, because they will be needed during the loading
+  // process for control flow nodes.
+  for (size_t i = 0; i < _module->size(); ++i)
+  {
+    _graph_to_runtime_graph.emplace(_module->graph(i), _runtime_module->addGraph(_memory_manager));
+  }
+  for (size_t i = 0; i < _module->size(); ++i)
+  {
+    const loco::Graph *graph = _module->graph(i);
+    RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
+    GraphLoader loader(graph, runtime_graph, _runtime_to_ir, _graph_to_runtime_graph,
+                       _node_to_tensor, _memory_manager);
+    loader.loadTensors();
+    loader.initInputOutputTensors();
+    loader.loadOperators();
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.h b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.h
new file mode 100644
index 000000000..11326a2ee
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_MODULELOADER_H
+#define LUCI_INTERPRETER_LOADER_MODULELOADER_H
+
+#include "core/RuntimeModule.h"
+#include "loader/RuntimeToIR.h"
+#include "luci_interpreter/MemoryManager.h"
+
+#include <luci/IR/Module.h>
+
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class ModuleLoader
+{
+public:
+  ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
+               RuntimeToIR &runtime_to_ir,
+               std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
+               IMemoryManager *memory_manager);
+
+  void load();
+
+private:
+  IMemoryManager *_memory_manager;
+  const luci::Module *_module;
+  RuntimeModule *_runtime_module;
+  RuntimeToIR &_runtime_to_ir;
+  std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
+  std::unordered_map<const loco::Graph *, RuntimeGraph *> _graph_to_runtime_graph;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_MODULELOADER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/RuntimeToIR.h b/compiler/luci-micro/luci-interpreter/src/loader/RuntimeToIR.h
new file mode 100644
index 000000000..9ea8b1fa2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/RuntimeToIR.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H
+#define LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+// Maps runtime entities back to IR entities. It is used to implement observing functionality.
+struct RuntimeToIR
+{
+  std::unordered_map<const Tensor *, const luci::CircleNode *> tensor_to_node;
+  std::unordered_map<const Kernel *, const luci::CircleNode *> kernel_to_node;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Add.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Add.cpp
new file mode 100644
index 000000000..501e84752
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Add.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Add.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleAdd(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleAdd *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  AddParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Add>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp
new file mode 100644
index 000000000..f3ca55744
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ArgMax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleArgMax(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleArgMax *>(circle_node);
+  assert(node->arity() == 2);
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *axis = helper.getInputTensor(node->dimension());
+  Tensor *output = helper.getOutputTensor(node);
+
+  ArgMaxParams params{};
+  params.output_type = node->output_type();
+
+  return std::make_unique<kernels::ArgMax>(input, axis, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
new file mode 100644
index 000000000..a8135706f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/AveragePool2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(const luci::CircleNode *circle_node,
+                                                         KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleAveragePool2D *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->value());
+  Tensor *output = helper.getOutputTensor(node);
+
+  Pool2DParams params{};
+  params.padding = node->padding();
+  params.filter_height = node->filter()->h();
+  params.filter_width = node->filter()->w();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.activation = node->fusedActivationFunction();
+
+  // It is unknown what data will be stored in scratchpad tensor,
+  // using UINT8 as a most general option
+  auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+  scratchpad->set_observable(false);
+  scratchpad->set_data_buffer(nullptr);
+  // If node has execution plan then read memory offsets for scratchpad temporary tensor
+  // from the beginning of shared memory buffer.
+  // Used in Static Memory Manager.
+  // TODO move tensors offset initialization to one place
+  if (luci::has_execution_plan(node))
+  {
+    const auto execution_plan = luci::get_execution_plan(node);
+    // Check whether the offset for the current CircleConv2D temporary was found.
+    if (execution_plan.offsets().size() > 1)
+      // If this is true, then we keep this offset in scratchpad.
+      scratchpad->set_offset(execution_plan.offsets().at(1));
+  }
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
+
+  return std::make_unique<kernels::AveragePool2D>(input, output, tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
new file mode 100644
index 000000000..9da2f6d93
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/BatchMatMul.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleBatchMatMul(const luci::CircleNode *circle_node,
+                                                       KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleBatchMatMul *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *lhs = helper.getInputTensor(node->x());
+  const Tensor *rhs = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  auto lhs_scratchpad =
+    std::make_unique<Tensor>(lhs->element_type(), Shape({}), AffineQuantization{}, "");
+  lhs_scratchpad->set_observable(false);
+  lhs_scratchpad->set_data_buffer(nullptr);
+  auto rhs_scratchpad =
+    std::make_unique<Tensor>(rhs->element_type(), Shape({}), AffineQuantization{}, "");
+  rhs_scratchpad->set_observable(false);
+  rhs_scratchpad->set_data_buffer(nullptr);
+  // If node has execution plan then read memory offsets for scratchpad temporary tensor
+  // from the beginning of shared memory buffer.
+  // Used in Static Memory Manager.
+  // TODO move tensors offset initialization to one place
+  if (luci::has_execution_plan(node))
+  {
+    const auto execution_plan = luci::get_execution_plan(node);
+    // Check whether the offset for the current BatchMatMul temporary was found.
+    if (execution_plan.offsets().size() > 1)
+    {
+      assert(execution_plan.offsets().size() == 3);
+
+      // If this is true, then we keep this offset in scratchpad.
+      lhs_scratchpad->set_offset(execution_plan.offsets().at(1));
+      rhs_scratchpad->set_offset(execution_plan.offsets().at(2));
+    }
+  }
+  Tensor *lhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(lhs_scratchpad));
+  Tensor *rhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(rhs_scratchpad));
+
+  BatchMatMulParams params;
+  params.adj_x = node->adj_x();
+  params.adj_y = node->adj_y();
+
+  return std::make_unique<kernels::BatchMatMul>(lhs, rhs, output, lhs_tmp, rhs_tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp
new file mode 100644
index 000000000..ac6ebb30f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/BatchToSpaceND.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleBatchToSpaceND(const luci::CircleNode *circle_node,
+                                                          KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleBatchToSpaceND *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *block_shape = helper.getInputTensor(node->block_shape());
+  const Tensor *crops = helper.getInputTensor(node->crops());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::BatchToSpaceND>(input, block_shape, crops, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Builders.h b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Builders.h
new file mode 100644
index 000000000..eab284008
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Builders.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
+#define LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
+
+#include "loader/KernelBuilderHelper.h"
+
+#include "luci/IR/CircleNodes.h"
+
+namespace luci_interpreter
+{
+
+#define REGISTER_KERNEL(name)                                                            \
+  std::unique_ptr<Kernel> build_kernel_Circle##name(const luci::CircleNode *circle_node, \
+                                                    KernelBuilderHelper &helper);
+
+#include "KernelsToBuild.lst"
+
+#undef REGISTER_KERNEL
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Cast.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Cast.cpp
new file mode 100644
index 000000000..a16354c96
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Cast.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Cast.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleCast(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleCast *>(circle_node);
+
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Cast>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp
new file mode 100644
index 000000000..ba2564ea2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Concatenation.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleConcatenation(const luci::CircleNode *circle_node,
+                                                         KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleConcatenation *>(circle_node);
+  std::vector<const Tensor *> inputs(node->numValues());
+  for (uint32_t i = 0; i < node->numValues(); ++i)
+  {
+    inputs[i] = helper.getInputTensor(node->values(i));
+  }
+  Tensor *output = helper.getOutputTensor(node);
+
+  ConcatenationParams params{};
+  params.axis = node->axis();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Concatenation>(std::move(inputs), output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp
new file mode 100644
index 000000000..218165e20
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Conv2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleConv2D *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *filter = helper.getInputTensor(node->filter());
+  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+  Tensor *output = helper.getOutputTensor(node);
+
+  // It is unknown what data will be stored in scratchpad tensor,
+  // using UINT8 as a most general option
+  auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+  scratchpad->set_observable(false);
+  scratchpad->set_data_buffer(nullptr);
+  // If node has execution plan then read memory offsets for scratchpad temporary tensor
+  // from the beginning of shared memory buffer.
+  // Used in Static Memory Manager.
+  // TODO move tensors offset initialization to one place
+  if (luci::has_execution_plan(node))
+  {
+    const auto execution_plan = luci::get_execution_plan(node);
+    // Check whether the offset for the current CircleConv2D temporary was found.
+    if (execution_plan.offsets().size() > 1)
+      // If this is true, then we keep this offset in scratchpad.
+      scratchpad->set_offset(execution_plan.offsets().at(1));
+  }
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
+
+  Conv2DParams params{};
+  params.padding = node->padding();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.dilation_height_factor = node->dilation()->h();
+  params.dilation_width_factor = node->dilation()->w();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Conv2D>(input, filter, bias, output, tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp
new file mode 100644
index 000000000..174946367
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/DepthToSpace.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDepthToSpace(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleDepthToSpace *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  DepthToSpaceParams params{};
+  params.block_size = node->block_size();
+
+  return std::make_unique<kernels::DepthToSpace>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
new file mode 100644
index 000000000..8af1e3b58
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/DepthwiseConv2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(const luci::CircleNode *circle_node,
+                                                           KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleDepthwiseConv2D *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *filter = helper.getInputTensor(node->filter());
+  const Tensor *bias = helper.getInputTensor(node->bias());
+  Tensor *output = helper.getOutputTensor(node);
+
+  DepthwiseConv2DParams params{};
+  params.padding = node->padding();
+  params.depth_multiplier = node->depthMultiplier();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.dilation_height_factor = node->dilation()->h();
+  params.dilation_width_factor = node->dilation()->w();
+  params.activation = node->fusedActivationFunction();
+
+  // It is unknown what data will be stored in scratchpad tensor,
+  // using UINT8 as a most general option
+  auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+  scratchpad->set_observable(false);
+  scratchpad->set_data_buffer(nullptr);
+  // If node has execution plan then read memory offsets for scratchpad temporary tensor
+  // from the beginning of shared memory buffer.
+  // Used in Static Memory Manager.
+  // TODO move tensors offset initialization to one place
+  if (luci::has_execution_plan(node))
+  {
+    const auto execution_plan = luci::get_execution_plan(node);
+    // Check whether the offset for the current CircleConv2D temporary was found.
+    if (execution_plan.offsets().size() > 1)
+      // If this is true, then we keep this offset in scratchpad.
+      scratchpad->set_offset(execution_plan.offsets().at(1));
+  }
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
+
+  return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp
new file mode 100644
index 000000000..787322e9b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Dequantize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDequantize(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleDequantize *>(circle_node);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Dequantize>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Div.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Div.cpp
new file mode 100644
index 000000000..0611dfdab
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Div.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Div.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDiv(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleDiv *>(circle_node);
+  assert(node->arity() == 2);
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  DivParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Div>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Elu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Elu.cpp
new file mode 100644
index 000000000..a79985e3b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Elu.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Elu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleElu(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleElu *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->features());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Elu>(input, output);
+}
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Equal.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Equal.cpp
new file mode 100644
index 000000000..59692883f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Equal.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Equal.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleEqual(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+
+{
+  const auto *node = loco::must_cast<const luci::CircleEqual *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Equal>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Exp.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Exp.cpp
new file mode 100644
index 000000000..30d11cb89
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Exp.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Exp.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleExp(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleExp *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Exp>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp
new file mode 100644
index 000000000..9840c34e5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ExpandDims.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleExpandDims(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleExpandDims *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *axis = helper.getInputTensor(node->axis());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::ExpandDims>(input, axis, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Fill.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Fill.cpp
new file mode 100644
index 000000000..3aefdf1c5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Fill.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Fill.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFill(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleFill *>(circle_node);
+  assert(node->arity() == 2);
+
+  const auto dims = helper.getInputTensor(node->dims());
+  const auto value = helper.getInputTensor(node->value());
+  auto output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Fill>(dims, value, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Floor.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Floor.cpp
new file mode 100644
index 000000000..e0a223116
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Floor.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Floor.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFloor(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleFloor *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Floor>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp
new file mode 100644
index 000000000..a45d89e38
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/FloorDiv.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFloorDiv(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleFloorDiv *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::FloorDiv>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp
new file mode 100644
index 000000000..b7b742b8a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/FullyConnected.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(const luci::CircleNode *circle_node,
+                                                          KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleFullyConnected *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *weights = helper.getInputTensor(node->weights());
+  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+  Tensor *output = helper.getOutputTensor(node);
+
+  FullyConnectedParams params{};
+  params.activation = node->fusedActivationFunction();
+  params.keep_num_dims = node->keep_num_dims();
+
+  return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Gather.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Gather.cpp
new file mode 100644
index 000000000..2ee2906e0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Gather.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Gather.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGather(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleGather *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *params = helper.getInputTensor(node->params());
+  const Tensor *indices = helper.getInputTensor(node->indices());
+  Tensor *output = helper.getOutputTensor(node);
+
+  GatherParams gparams{};
+  gparams.axis = node->axis();
+  // TODO support batch_dims
+  gparams.batch_dims = 0;
+
+  return std::make_unique<kernels::Gather>(params, indices, output, gparams);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Greater.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Greater.cpp
new file mode 100644
index 000000000..80aa63cf0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Greater.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Greater.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGreater(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleGreater *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Greater>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp
new file mode 100644
index 000000000..272f2843b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/GreaterEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGreaterEqual(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleGreaterEqual *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::GreaterEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/If.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/If.cpp
new file mode 100644
index 000000000..3ac7d4941
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/If.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/If.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleIf(const luci::CircleNode *circle_node,
+                                              KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleIf *>(circle_node);
+  auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node);
+  assert(node->arity() == 1 + node->input_count());
+  assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
+
+  const Tensor *cond = helper.getInputTensor(node->cond());
+  std::vector<const Tensor *> inputs(node->input_count());
+  for (uint32_t i = 0; i < node->input_count(); ++i)
+  {
+    inputs[i] = helper.getInputTensor(node->input(i));
+  }
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  RuntimeGraph *then_graph = helper.getRuntimeGraph(node->then_graph());
+  RuntimeGraph *else_graph = helper.getRuntimeGraph(node->else_graph());
+
+  return std::make_unique<kernels::If>(cond, std::move(inputs), std::move(outputs), then_graph,
+                                       else_graph);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp
new file mode 100644
index 000000000..06031e5bc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/InstanceNorm.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleInstanceNorm(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleInstanceNorm *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *gamma = helper.getInputTensor(node->gamma());
+  const Tensor *beta = helper.getInputTensor(node->beta());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  InstanceNormParams params{};
+  params.epsilon = node->epsilon();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::InstanceNorm>(input, gamma, beta, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp
new file mode 100644
index 000000000..6e22e6d4e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/L2Normalize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleL2Normalize(const luci::CircleNode *circle_node,
+                                                       KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleL2Normalize *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  L2NormParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::L2Normalize>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp
new file mode 100644
index 000000000..95b55896f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/L2Pool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleL2Pool2D(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleL2Pool2D *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->value());
+  Tensor *output = helper.getOutputTensor(node);
+
+  Pool2DParams params{};
+  params.padding = node->padding();
+  params.filter_height = node->filter()->h();
+  params.filter_width = node->filter()->w();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::L2Pool2D>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp
new file mode 100644
index 000000000..bbf5067b1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LeakyRelu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLeakyRelu(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLeakyRelu *>(circle_node);
+  assert(node->arity() == 1);
+  const Tensor *input = helper.getInputTensor(node->features());
+  Tensor *output = helper.getOutputTensor(node);
+
+  LeakyReluParams params{};
+  params.alpha = node->alpha();
+
+  return std::make_unique<kernels::LeakyRelu>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Less.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Less.cpp
new file mode 100644
index 000000000..ae914ecc9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Less.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Less.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLess(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLess *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Less>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp
new file mode 100644
index 000000000..f1b424b55
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LessEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLessEqual(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLessEqual *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LessEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp
new file mode 100644
index 000000000..962ca2d7c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LocalResponseNormalization.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleLocalResponseNormalization(const luci::CircleNode *circle_node,
+                                              KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLocalResponseNormalization *>(circle_node);
+  assert(node->arity() == 1);
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = node->radius();
+  params.bias = node->bias();
+  params.alpha = node->alpha();
+  params.beta = node->beta();
+
+  return std::make_unique<kernels::LocalResponseNormalization>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp
new file mode 100644
index 000000000..432204115
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogSoftmax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogSoftmax(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLogSoftmax *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->logits());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LogSoftmax>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp
new file mode 100644
index 000000000..bf3cb671a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalAnd.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalAnd(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLogicalAnd *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LogicalAnd>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp
new file mode 100644
index 000000000..fefcd9a06
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalNot.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalNot(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLogicalNot *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LogicalNot>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp
new file mode 100644
index 000000000..a416cb401
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalOr.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalOr(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLogicalOr *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LogicalOr>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Logistic.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Logistic.cpp
new file mode 100644
index 000000000..4a69deef1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Logistic.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Logistic.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogistic(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLogistic *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Logistic>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp
new file mode 100644
index 000000000..f66a206ca
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/MaxPool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMaxPool2D(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleMaxPool2D *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->value());
+  Tensor *output = helper.getOutputTensor(node);
+
+  Pool2DParams params{};
+  params.padding = node->padding();
+  params.filter_height = node->filter()->h();
+  params.filter_width = node->filter()->w();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::MaxPool2D>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Maximum.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Maximum.cpp
new file mode 100644
index 000000000..d0bff776a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Maximum.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Maximum.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMaximum(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleMaximum *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Maximum>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mean.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mean.cpp
new file mode 100644
index 000000000..0dec63e79
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mean.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Mean.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMean(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleMean *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *axes = helper.getInputTensor(node->reduction_indices());
+  Tensor *output = helper.getOutputTensor(node);
+
+  auto temp_index_unique =
+    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+  temp_index_unique->set_observable(false);
+  temp_index_unique->set_data_buffer(nullptr);
+  Tensor *temp_index =
+    helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique));
+
+  auto resolved_axes_unique =
+    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+  resolved_axes_unique->set_observable(false);
+  resolved_axes_unique->set_data_buffer(nullptr);
+  Tensor *resolved_axes =
+    helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique));
+
+  auto temp_sum_unique =
+    std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, "");
+  temp_sum_unique->set_observable(false);
+  temp_sum_unique->set_data_buffer(nullptr);
+  Tensor *temp_sum = helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_sum_unique));
+
+  ReducerParams params{};
+  params.keep_dims = node->keep_dims();
+
+  return std::make_unique<kernels::Mean>(input, axes, output, temp_index, resolved_axes, temp_sum,
+                                         params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Minimum.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Minimum.cpp
new file mode 100644
index 000000000..1a49c1090
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Minimum.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Minimum.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMinimum(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleMinimum *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Minimum>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp
new file mode 100644
index 000000000..b221b4574
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/MirrorPad.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMirrorPad(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleMirrorPad *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *paddings = helper.getInputTensor(node->paddings());
+  Tensor *output = helper.getOutputTensor(node);
+
+  MirrorPadParams params{};
+  params.mode = node->mode();
+
+  return std::make_unique<kernels::MirrorPad>(input, paddings, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mul.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mul.cpp
new file mode 100644
index 000000000..f9984853a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mul.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Mul.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMul(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleMul *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  MulParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Mul>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Neg.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Neg.cpp
new file mode 100644
index 000000000..9a9ecf991
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Neg.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Neg.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleNeg(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleNeg *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Neg>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp
new file mode 100644
index 000000000..3916a5854
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/NotEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleNotEqual(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleNotEqual *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::NotEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/OneHot.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/OneHot.cpp
new file mode 100644
index 000000000..a40160945
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/OneHot.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/OneHot.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleOneHot(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleOneHot *>(circle_node);
+  assert(node->arity() == 4);
+
+  const Tensor *indices = helper.getInputTensor(node->indices());
+  const Tensor *depth = helper.getInputTensor(node->depth());
+  const Tensor *on_value = helper.getInputTensor(node->on_value());
+  const Tensor *off_value = helper.getInputTensor(node->off_value());
+  Tensor *output = helper.getOutputTensor(node);
+
+  OneHotParams params{};
+  params.axis = node->axis();
+
+  return std::make_unique<kernels::OneHot>(indices, depth, on_value, off_value, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/PRelu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PRelu.cpp
new file mode 100644
index 000000000..f3d700c95
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PRelu.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/PRelu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePRelu(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CirclePRelu *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *alpha = helper.getInputTensor(node->alpha());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::PRelu>(input, alpha, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pack.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pack.cpp
new file mode 100644
index 000000000..efc5850e0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pack.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pack.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePack(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CirclePack *>(circle_node);
+  assert(node->arity() == node->values_count());
+
+  std::vector<const Tensor *> inputs(node->values_count());
+  for (uint32_t i = 0; i < node->values_count(); ++i)
+  {
+    inputs[i] = helper.getInputTensor(node->values(i));
+  }
+  Tensor *output = helper.getOutputTensor(node);
+
+  PackParams params{};
+  params.axis = node->axis();
+  params.values_count = node->values_count();
+
+  return std::make_unique<kernels::Pack>(std::move(inputs), output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pad.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pad.cpp
new file mode 100644
index 000000000..67ce997a7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pad.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pad.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePad(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CirclePad *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *paddings = helper.getInputTensor(node->paddings());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Pad>(input, paddings, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/PadV2.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PadV2.cpp
new file mode 100644
index 000000000..e378a972a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PadV2.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/PadV2.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePadV2(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CirclePadV2 *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *paddings = helper.getInputTensor(node->paddings());
+  const Tensor *constant_values = helper.getInputTensor(node->constant_values());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::PadV2>(input, paddings, constant_values, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pow.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pow.cpp
new file mode 100644
index 000000000..d32fc3dbb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pow.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pow.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePow(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CirclePow *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Pow>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Quantize.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Quantize.cpp
new file mode 100644
index 000000000..cb36fb6da
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Quantize.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Quantize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleQuantize(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleQuantize *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Quantize>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu.cpp
new file mode 100644
index 000000000..1d64c1c4e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Relu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRelu(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleRelu *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->features());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Relu>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu6.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu6.cpp
new file mode 100644
index 000000000..e50cd2545
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu6.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Relu6.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRelu6(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleRelu6 *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->features());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Relu6>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Reshape.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Reshape.cpp
new file mode 100644
index 000000000..76ddd88a3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Reshape.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Reshape.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReshape(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleReshape *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->tensor());
+  const Tensor *shape = helper.getInputTensor(node->shape());
+  Tensor *output = helper.getOutputTensor(node);
+
+  // NOTE 'newShape' attribute is ignored.
+  return std::make_unique<kernels::Reshape>(input, shape, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp
new file mode 100644
index 000000000..dc2b88ad3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ResizeBilinear.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleResizeBilinear(const luci::CircleNode *circle_node,
+                                                          KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleResizeBilinear *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *size = helper.getInputTensor(node->size());
+  Tensor *output = helper.getOutputTensor(node);
+
+  ResizeBilinearParams params{};
+  params.align_corners = node->align_corners();
+  params.half_pixel_centers = node->half_pixel_centers();
+
+  return std::make_unique<kernels::ResizeBilinear>(input, size, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp
new file mode 100644
index 000000000..c7058ae78
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ResizeNearestNeighbor.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleResizeNearestNeighbor(const luci::CircleNode *circle_node,
+                                         KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleResizeNearestNeighbor *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *size = helper.getInputTensor(node->size());
+  Tensor *output = helper.getOutputTensor(node);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = node->align_corners();
+  // TODO update half_pixel_centers after CircleResizeNearestNeighbor updated
+  // Current CircleResizeNearestNeighbor don't have half_pixel_centers.
+  // default value on current is false.
+  // it need to be updated when CircleResizeNearestNeighbor updated.
+  params.half_pixel_centers = false;
+
+  return std::make_unique<kernels::ResizeNearestNeighbor>(input, size, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp
new file mode 100644
index 000000000..c1a7f5350
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ReverseV2.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReverseV2(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleReverseV2 *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->tensor());
+  const Tensor *axes = helper.getInputTensor(node->axis());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::ReverseV2>(input, axes, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp
new file mode 100644
index 000000000..0714a5dba
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Rsqrt.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRsqrt(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleRsqrt *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Rsqrt>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SVDF.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SVDF.cpp
new file mode 100644
index 000000000..d172ef438
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SVDF.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SVDF.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSVDF(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSVDF *>(circle_node);
+  assert(node->arity() == 5);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *feature = helper.getInputTensor(node->weight_feature());
+  const Tensor *time = helper.getInputTensor(node->weight_time());
+  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+  const Tensor *input_activation_state = helper.getInputTensor(node->input_activation_state());
+  Tensor *output = helper.getOutputTensor(node);
+
+  auto scratchpad_tensor = std::make_unique<Tensor>(input_activation_state->element_type(),
+                                                    Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  DataType data_type = input->element_type() == DataType::S8 ? DataType::S32 : DataType::FLOAT32;
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_1 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  if (data_type == DataType::FLOAT32 &&
+      (feature->element_type() == DataType::S8 || feature->element_type() == DataType::U8))
+  {
+    data_type = feature->element_type();
+  }
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_2 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  data_type = DataType::FLOAT32;
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_3 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_4 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_5 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_6 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  SVDFParams params{};
+  params.activation = node->fusedActivationFunction();
+  params.svdf_rank = node->svdf_rank();
+  params.asymmetric_quantize_inputs = node->asymmetric_quantize_inputs();
+
+  return std::make_unique<kernels::SVDF>(input, feature, time, bias, input_activation_state, output,
+                                         tmp, tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Shape.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Shape.cpp
new file mode 100644
index 000000000..d1edbc794
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Shape.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Shape.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleShape(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleShape *>(circle_node);
+  assert(node->arity() == 1);
+
+  const auto input = helper.getInputTensor(node->input());
+  auto output = helper.getOutputTensor(node);
+
+  ShapeParams shape_params{};
+  shape_params.out_type = node->out_type();
+
+  return std::make_unique<kernels::ShapeKernel>(input, output, shape_params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Slice.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Slice.cpp
new file mode 100644
index 000000000..60ac6417c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Slice.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Slice.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSlice(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSlice *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *begin = helper.getInputTensor(node->begin());
+  const Tensor *size = helper.getInputTensor(node->size());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Slice>(input, begin, size, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Softmax.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Softmax.cpp
new file mode 100644
index 000000000..f41f63f6f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Softmax.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Softmax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSoftmax(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSoftmax *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->logits());
+  Tensor *output = helper.getOutputTensor(node);
+
+  SoftmaxParams params{};
+  params.beta = node->beta();
+
+  return std::make_unique<kernels::Softmax>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp
new file mode 100644
index 000000000..b6e6cf516
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SpaceToBatchND.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSpaceToBatchND(const luci::CircleNode *circle_node,
+                                                          KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSpaceToBatchND *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *block_shape = helper.getInputTensor(node->block_shape());
+  const Tensor *paddings = helper.getInputTensor(node->paddings());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::SpaceToBatchND>(input, block_shape, paddings, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp
new file mode 100644
index 000000000..63fdb95ec
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SpaceToDepth.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSpaceToDepth(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSpaceToDepth *>(circle_node);
+  assert(node->arity() == 1);
+  const Tensor *input = helper.getInputTensor(node->input());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  SpaceToDepthParams params{};
+  params.block_size = node->block_size();
+
+  return std::make_unique<kernels::SpaceToDepth>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Split.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Split.cpp
new file mode 100644
index 000000000..3f6d4a7df
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Split.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Split.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSplit(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSplit *>(circle_node);
+  auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node);
+  assert(node->arity() == 2);
+  assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
+
+  const Tensor *axis = helper.getInputTensor(node->split_dim());
+  const Tensor *input = helper.getInputTensor(node->input());
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  // NOTE 'num_splits' attribute is ignored.
+  return std::make_unique<kernels::Split>(axis, input, std::move(outputs));
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SplitV.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SplitV.cpp
new file mode 100644
index 000000000..0788822ca
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SplitV.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SplitV.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSplitV(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSplitV *>(circle_node);
+  auto output_nodes = collectOutputNodes<luci::CircleSplitVOut>(node);
+  assert(node->arity() == 3);
+  assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *sizes_data = helper.getInputTensor(node->size_splits());
+  const Tensor *axis = helper.getInputTensor(node->split_dim());
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  // NOTE 'num_splits' attribute is ignored.
+  return std::make_unique<kernels::SplitV>(input, sizes_data, axis, std::move(outputs));
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp
new file mode 100644
index 000000000..b9843fe0b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Sqrt.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSqrt(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSqrt *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Sqrt>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Square.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Square.cpp
new file mode 100644
index 000000000..0ad7c1772
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Square.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Square.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSquare(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSquare *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Square>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp
new file mode 100644
index 000000000..e4c6fd851
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SquaredDifference.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSquaredDifference(const luci::CircleNode *circle_node,
+                                                             KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSquaredDifference *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::SquaredDifference>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp
new file mode 100644
index 000000000..6885f8077
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Squeeze.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSqueeze(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSqueeze *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  SqueezeParams params{};
+  params.squeeze_dims = node->squeeze_dims();
+
+  return std::make_unique<kernels::Squeeze>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp
new file mode 100644
index 000000000..359b4e3e9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/StridedSlice.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleStridedSlice(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleStridedSlice *>(circle_node);
+  assert(node->arity() == 4);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *begin = helper.getInputTensor(node->begin());
+  const Tensor *end = helper.getInputTensor(node->end());
+  const Tensor *strides = helper.getInputTensor(node->strides());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  StridedSliceParams params{};
+  params.begin_mask = node->begin_mask();
+  params.ellipsis_mask = node->ellipsis_mask();
+  params.end_mask = node->end_mask();
+  params.new_axis_mask = node->new_axis_mask();
+  params.shrink_axis_mask = node->shrink_axis_mask();
+
+  return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sub.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sub.cpp
new file mode 100644
index 000000000..a6252cb53
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sub.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Sub.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSub(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSub *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  SubParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Sub>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Tanh.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Tanh.cpp
new file mode 100644
index 000000000..a58ef60a8
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Tanh.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Tanh.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTanh(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleTanh *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Tanh>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Transpose.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Transpose.cpp
new file mode 100644
index 000000000..ea17d8311
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Transpose.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Transpose.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTranspose(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleTranspose *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->a());
+  const Tensor *perm = helper.getInputTensor(node->perm());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Transpose>(input, perm, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp
new file mode 100644
index 000000000..d773e301e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/TransposeConv.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTransposeConv(const luci::CircleNode *circle_node,
+                                                         KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleTransposeConv *>(circle_node);
+  assert(node->arity() == 4);
+
+  const Tensor *input_sizes = helper.getInputTensor(node->inputSizes());
+  const Tensor *filter = helper.getInputTensor(node->filter());
+  const Tensor *out_backprop = helper.getInputTensor(node->outBackprop());
+  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  DataType scratch_data_type =
+    helper.getInputTensor(node)->element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+
+  auto scratch_tensor =
+    std::make_unique<Tensor>(scratch_data_type, Shape({}), AffineQuantization{}, "");
+  scratch_tensor->set_observable(false);
+  scratch_tensor->set_data_buffer(nullptr);
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratch_tensor));
+
+  TransposeConvParams params{};
+  params.padding = node->padding();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+
+  return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output,
+                                                  tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Unpack.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Unpack.cpp
new file mode 100644
index 000000000..a1c0d323a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Unpack.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Unpack.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleUnpack(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleUnpack *>(circle_node);
+  auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node);
+  assert(node->arity() == 1);
+  assert(output_nodes.size() == static_cast<size_t>(node->num()));
+
+  const Tensor *input = helper.getInputTensor(node->value());
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  UnpackParams params{};
+  params.axis = node->axis();
+
+  // NOTE 'num' attribute is ignored.
+  return std::make_unique<kernels::Unpack>(input, std::move(outputs), params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/While.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/While.cpp
new file mode 100644
index 000000000..8fde6ec8a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/While.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/While.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleWhile(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleWhile *>(circle_node);
+
+  auto output_nodes = collectOutputNodes<luci::CircleWhileOut>(node);
+  assert(node->arity() == node->input_count());
+  assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
+
+  std::vector<const Tensor *> inputs(node->input_count());
+  for (uint32_t i = 0; i < node->input_count(); ++i)
+  {
+    inputs[i] = helper.getInputTensor(node->input(i));
+  }
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  RuntimeGraph *cond_graph = helper.getRuntimeGraph(node->cond_graph());
+  RuntimeGraph *body_graph = helper.getRuntimeGraph(node->body_graph());
+
+  return std::make_unique<kernels::While>(std::move(inputs), std::move(outputs), cond_graph,
+                                          body_graph);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/standalone/CMakeLists.txt b/compiler/luci-micro/standalone/CMakeLists.txt
index 7953359ad..d3048264d 100644
--- a/compiler/luci-micro/standalone/CMakeLists.txt
+++ b/compiler/luci-micro/standalone/CMakeLists.txt
@@ -7,6 +7,9 @@ set(BUILD_WHITELIST "dummy")
 add_subdirectory(${NNAS_ROOT}/infra/nncc ${CMAKE_CURRENT_BINARY_DIR}/nncc)
 
 set(ONE_COMPILER_SRC_DIR "${NNAS_PROJECT_SOURCE_DIR}/compiler")
+nnas_find_package(FlatBuffersSource EXACT 2.0 QUIET)
+
+include_directories(${FlatBuffersSource_DIR}/include)
 
 add_subdirectory(${ONE_COMPILER_SRC_DIR}/loco ${CMAKE_CURRENT_BINARY_DIR}/loco)
 add_subdirectory(${ONE_COMPILER_SRC_DIR}/angkor ${CMAKE_CURRENT_BINARY_DIR}/angkor)
@@ -14,7 +17,21 @@ add_subdirectory(${ONE_COMPILER_SRC_DIR}/oops ${CMAKE_CURRENT_BINARY_DIR}/oops)
 add_subdirectory(${ONE_COMPILER_SRC_DIR}/pepper-str ${CMAKE_CURRENT_BINARY_DIR}/pepper-str)
 add_subdirectory(${ONE_COMPILER_SRC_DIR}/logo ${CMAKE_CURRENT_BINARY_DIR}/logo)
 add_subdirectory(${ONE_COMPILER_SRC_DIR}/logo-core ${CMAKE_CURRENT_BINARY_DIR}/logo-core)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/hermes-std ${CMAKE_CURRENT_BINARY_DIR}/hermes-std)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/hermes ${CMAKE_CURRENT_BINARY_DIR}/hermes)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/pepper-strcast ${CMAKE_CURRENT_BINARY_DIR}/pepper-strcast)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/foder ${CMAKE_CURRENT_BINARY_DIR}/foder)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/mio-circle04 ${CMAKE_CURRENT_BINARY_DIR}/mio-circle04)
+
 add_subdirectory(${ONE_COMPILER_SRC_DIR}/locomotiv ${CMAKE_CURRENT_BINARY_DIR}/locomotiv)
 add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/lang ${CMAKE_CURRENT_BINARY_DIR}/luci/lang)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/import ${CMAKE_CURRENT_BINARY_DIR}/luci/import)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/profile ${CMAKE_CURRENT_BINARY_DIR}/luci/profile)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/env ${CMAKE_CURRENT_BINARY_DIR}/luci/env)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/plan ${CMAKE_CURRENT_BINARY_DIR}/luci/plan)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/log ${CMAKE_CURRENT_BINARY_DIR}/luci/log)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/logex ${CMAKE_CURRENT_BINARY_DIR}/luci/logex)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/locop ${CMAKE_CURRENT_BINARY_DIR}/locop)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/pp ${CMAKE_CURRENT_BINARY_DIR}/pp)
 
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci-interpreter ${CMAKE_CURRENT_BINARY_DIR}/luci-interpreter)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci-micro/luci-interpreter ${CMAKE_CURRENT_BINARY_DIR}/luci-interpreter)
diff --git a/compiler/luci-pass-value-test/CMakeLists.txt b/compiler/luci-pass-value-test/CMakeLists.txt
index 034fe5269..3489f1eac 100644
--- a/compiler/luci-pass-value-test/CMakeLists.txt
+++ b/compiler/luci-pass-value-test/CMakeLists.txt
@@ -17,6 +17,13 @@ macro(addeval RECIPE PASS_OPTION)
   set(PASS_CIRCLE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PASS_CIRCLE_FILE}")
 
   set(DASH_PASS_OPTION "--${PASS_OPTION}")
+  foreach(MORE_OPTIONS ${ARGN})
+    list(APPEND DASH_PASS_OPTION "--${MORE_OPTIONS}")
+  endforeach()
+  # NOTE if there are two options, 'DASH_PASS_OPTION' will be like '--option_a;--option_b'
+  #      add_custom_command() will translate ';' to two arguments as '--optiona_a --optionb'
+  #      do not use set(DASH_PASS_OPTION "${DASH_PASS_OPTION} --${ARG}"))
+  #      as this will become like '"--optiona_a --optionb"' which is one string argument
 
   # Generate optimized .circle
   add_custom_command(OUTPUT ${PASS_CIRCLE_OUTPUT_PATH}
diff --git a/compiler/luci-pass-value-test/test.lst b/compiler/luci-pass-value-test/test.lst
index 67476c644..cdff159e0 100644
--- a/compiler/luci-pass-value-test/test.lst
+++ b/compiler/luci-pass-value-test/test.lst
@@ -14,6 +14,8 @@ addeval(Net_Conv_Add_Mul_002 fuse_batchnorm_with_conv)
 addeval(Net_Conv_Min_Max_000 transform_min_max_to_relu6)
 addeval(Net_Conv_Min_Relu_000 transform_min_relu_to_relu6)
 addeval(Net_Conv_Relu6_000 fuse_activation_function)
+addeval(Net_Densify_Add_000 fold_densify)
+addeval(Net_Dequantize_Add_000 fold_dequantize)
 addeval(Net_DwConv_BN_000 fuse_batchnorm_with_dwconv)
 addeval(Net_DwConv_BN_001 fuse_batchnorm_with_dwconv)
 addeval(Net_Reshape_Neg_000 forward_reshape_to_unaryop)
@@ -25,10 +27,17 @@ addeval(Net_TConv_Add_002 fuse_add_with_tconv)
 addeval(Net_TConv_BN_000 fuse_batchnorm_with_tconv)
 addeval(Net_TConv_BN_001 fuse_batchnorm_with_tconv)
 addeval(Net_TConv_BN_002 fuse_batchnorm_with_tconv)
+addeval(Net_TConv_BN_003 fuse_batchnorm_with_tconv)
+addeval(Net_TConv_BN_004 fuse_batchnorm_with_tconv)
 addeval(Net_InstanceNorm_001 fuse_instnorm)
 addeval(Net_InstanceNorm_002 fuse_instnorm)
 addeval(Net_InstanceNorm_003 fuse_instnorm)
 addeval(Net_StridedSlice_StridedSlice_000 remove_unnecessary_strided_slice)
+addeval(FullyConnected_007 replace_non_const_fc_with_batch_matmul)
+
+# test for limited support for FLOAT16
+addeval(Net_Dequantize_Add_000 fold_dequantize)
+addeval(Net_Densify_Dequantize_Add_000 fold_dequantize fold_densify)
 
 # test SignatureDef, with any optimization
 #addeval(SignatureDef_MultiOut_000 fuse_instnorm)
diff --git a/compiler/luci-value-test/test.lst b/compiler/luci-value-test/test.lst
index f62b72919..932da95c5 100644
--- a/compiler/luci-value-test/test.lst
+++ b/compiler/luci-value-test/test.lst
@@ -161,6 +161,8 @@ addeval(Squeeze_001)
 addeval(StridedSlice_000)
 addeval(StridedSlice_001)
 addeval(StridedSlice_002)
+addeval(StridedSlice_003)
+addeval(StridedSlice_004)
 addeval(Sub_000)
 addeval(Sub_U8_000)
 #addeval(Sum_000)
diff --git a/compiler/luci/export/src/CircleBuiltinTypesExtractor.h b/compiler/luci/export/src/CircleBuiltinTypesExtractor.h
index 0ff21a34b..7516197c0 100644
--- a/compiler/luci/export/src/CircleBuiltinTypesExtractor.h
+++ b/compiler/luci/export/src/CircleBuiltinTypesExtractor.h
@@ -118,6 +118,10 @@ public:
     return circle::CreateCosOptions(_builder).Union();
   }
   flatbuffers::Offset<void> visit(luci::CircleCustom *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleDensify *)
+  {
+    return circle::CreateDensifyOptions(_builder).Union();
+  }
   flatbuffers::Offset<void> visit(luci::CircleDepthToSpace *node)
   {
     return circle::CreateDepthToSpaceOptions(_builder, node->block_size()).Union();
diff --git a/compiler/luci/export/src/CircleOps.lst b/compiler/luci/export/src/CircleOps.lst
index 1b6909303..8a75ef706 100644
--- a/compiler/luci/export/src/CircleOps.lst
+++ b/compiler/luci/export/src/CircleOps.lst
@@ -32,6 +32,7 @@ CIRCLE_NODE(CircleConcatenation, BuiltinOperator_CONCATENATION, BuiltinOptions_C
 CIRCLE_NODE(CircleConv2D, BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions)
 CIRCLE_NODE(CircleCos, BuiltinOperator_COS, BuiltinOptions_CosOptions)
 CIRCLE_NODE(CircleCustom, BuiltinOperator_CUSTOM, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleDensify, BuiltinOperator_DENSIFY, BuiltinOptions_DensifyOptions)
 CIRCLE_NODE(CircleDepthToSpace, BuiltinOperator_DEPTH_TO_SPACE, BuiltinOptions_DepthToSpaceOptions)
 CIRCLE_NODE(CircleDepthwiseConv2D, BuiltinOperator_DEPTHWISE_CONV_2D, BuiltinOptions_DepthwiseConv2DOptions)
 CIRCLE_NODE(CircleDequantize, BuiltinOperator_DEQUANTIZE, BuiltinOptions_DequantizeOptions)
diff --git a/compiler/luci/export/src/CircleTensorExporter.cpp b/compiler/luci/export/src/CircleTensorExporter.cpp
index b3bb850cc..97e81076b 100644
--- a/compiler/luci/export/src/CircleTensorExporter.cpp
+++ b/compiler/luci/export/src/CircleTensorExporter.cpp
@@ -434,6 +434,12 @@ flatbuffers::Offset<circle::Buffer> encodeOpBuffer(FlatBufferBuilder &builder, l
       break;
   }
 
+  // NOTE loco::DataType::FLOAT16 is added but we do not export this type
+  //      as backends currently don't support this type.
+  //      currently this is supported only for "Tensor(Float16) - Dequantize"
+  //      sequence so that after 'fold_dequantize' option this Tensor is
+  //      converted to FLOAT32.
+
   INTERNAL_EXN_V("Unsupported datatype", oops::to_uint32(c->dtype()));
 }
 
diff --git a/compiler/luci/import/CMakeLists.txt b/compiler/luci/import/CMakeLists.txt
index 1b2db23ae..bc0a00b34 100644
--- a/compiler/luci/import/CMakeLists.txt
+++ b/compiler/luci/import/CMakeLists.txt
@@ -18,6 +18,7 @@ target_link_libraries(luci_import PRIVATE luci_log)
 target_link_libraries(luci_import PRIVATE luci_logex)
 target_link_libraries(luci_import PRIVATE nncc_common)
 target_link_libraries(luci_import PRIVATE locop)
+target_link_libraries(luci_import PRIVATE foder)
 target_link_libraries(luci_import PRIVATE oops)
 target_link_libraries(luci_import PRIVATE mio_circle04_helper)
 install(TARGETS luci_import DESTINATION lib)
diff --git a/compiler/luci/import/include/luci/Import/Nodes.h b/compiler/luci/import/include/luci/Import/Nodes.h
index 7a5045ede..a4a6d7ce8 100644
--- a/compiler/luci/import/include/luci/Import/Nodes.h
+++ b/compiler/luci/import/include/luci/Import/Nodes.h
@@ -35,6 +35,7 @@
 #include "Nodes/CircleConv2D.h"
 #include "Nodes/CircleCos.h"
 #include "Nodes/CircleCustom.h"
+#include "Nodes/CircleDensify.h"
 #include "Nodes/CircleDepthToSpace.h"
 #include "Nodes/CircleDepthwiseConv2D.h"
 #include "Nodes/CircleDequantize.h"
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleDensify.h b/compiler/luci/import/include/luci/Import/Nodes/CircleDensify.h
new file mode 100644
index 000000000..42bdac1a4
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleDensify.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_DENSIFY_H__
+#define __LUCI_IMPORT_OP_CIRCLE_DENSIFY_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleDensifyGraphBuilder : public GraphBuilder
+{
+public:
+  bool validate(const ValidateArgs &args) const final;
+
+private:
+  CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+                         loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_DENSIFY_H__
diff --git a/compiler/luci/import/include/luci/ImporterEx.h b/compiler/luci/import/include/luci/ImporterEx.h
new file mode 100644
index 000000000..852d4c848
--- /dev/null
+++ b/compiler/luci/import/include/luci/ImporterEx.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORTER_EX_H__
+#define __LUCI_IMPORTER_EX_H__
+
+#include "luci/IR/Module.h"
+
+#include <memory>
+#include <string>
+
+namespace luci
+{
+
+class ImporterEx final
+{
+public:
+  ImporterEx() = default;
+
+public:
+  std::unique_ptr<Module> importVerifyModule(const std::string &input_path) const;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORTER_EX_H__
diff --git a/compiler/luci/import/src/GraphBuilderRegistry.cpp b/compiler/luci/import/src/GraphBuilderRegistry.cpp
index fe2d830e9..d3b52aadb 100644
--- a/compiler/luci/import/src/GraphBuilderRegistry.cpp
+++ b/compiler/luci/import/src/GraphBuilderRegistry.cpp
@@ -44,6 +44,7 @@ GraphBuilderRegistry::GraphBuilderRegistry()
   CIRCLE_NODE(CONCATENATION, CircleConcatenationGraphBuilder);                             // 2
   CIRCLE_NODE(CONV_2D, CircleConv2DGraphBuilder);                                          // 3
   CIRCLE_NODE(COS, CircleCosGraphBuilder);                                                 // 108
+  CIRCLE_NODE(DENSIFY, CircleDensifyGraphBuilder);                                         // 124
   CIRCLE_NODE(DEPTH_TO_SPACE, CircleDepthToSpaceGraphBuilder);                             // 5
   CIRCLE_NODE(DEPTHWISE_CONV_2D, CircleDepthwiseConv2DGraphBuilder);                       // 4
   CIRCLE_NODE(DEQUANTIZE, CircleDequantizeGraphBuilder);                                   // 6
@@ -160,7 +161,6 @@ GraphBuilderRegistry::GraphBuilderRegistry()
   // BuiltinOperator_DELEGATE = 51,
   // BuiltinOperator_ARG_MAX = 56,
   // BuiltinOperator_HARD_SWISH = 117,
-  // BuiltinOperator_DENSIFY = 124,
 
   // Register builders for nodes which not handles in builders registered above.
 #define CIRCLE_NODE(CLASS) add(std::make_unique<CLASS>())
diff --git a/compiler/luci/import/src/ImporterEx.cpp b/compiler/luci/import/src/ImporterEx.cpp
new file mode 100644
index 000000000..db585fd4d
--- /dev/null
+++ b/compiler/luci/import/src/ImporterEx.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Importer.h"
+#include "luci/ImporterEx.h"
+
+#include <foder/FileLoader.h>
+
+#include <memory>
+#include <iostream>
+
+namespace luci
+{
+
+std::unique_ptr<Module> ImporterEx::importVerifyModule(const std::string &input_path) const
+{
+  foder::FileLoader file_loader{input_path};
+  std::vector<char> model_data;
+
+  try
+  {
+    model_data = file_loader.load();
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cerr << err.what() << std::endl;
+    return nullptr;
+  }
+
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
+  if (!circle::VerifyModelBuffer(verifier))
+  {
+    std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+    return nullptr;
+  }
+
+  const circle::Model *circle_model = circle::GetModel(model_data.data());
+  if (circle_model == nullptr)
+  {
+    std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+    return nullptr;
+  }
+
+  Importer importer;
+  return importer.importModule(circle_model);
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleConst.cpp b/compiler/luci/import/src/Nodes/CircleConst.cpp
index a4f190dd9..88f2ae3d0 100644
--- a/compiler/luci/import/src/Nodes/CircleConst.cpp
+++ b/compiler/luci/import/src/Nodes/CircleConst.cpp
@@ -166,6 +166,10 @@ CircleNode *CircleConstNodeBuilder::build(TensorIndex tensor_index,
         copy_data<loco::DataType::FLOAT32>(buffer, num_elements, const_node);
         break;
 
+      case loco::DataType::FLOAT16:
+        copy_data<loco::DataType::FLOAT16>(buffer, num_elements, const_node);
+        break;
+
       case loco::DataType::U8:
         copy_data<loco::DataType::U8>(buffer, num_elements, const_node);
         break;
diff --git a/compiler/luci/import/src/Nodes/CircleDensify.cpp b/compiler/luci/import/src/Nodes/CircleDensify.cpp
new file mode 100644
index 000000000..0a4b2186f
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleDensify.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleDensify.h"
+
+#include <luci/IR/Nodes/CircleDensify.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleDensifyGraphBuilder::validate(const ValidateArgs &args) const
+{
+  return GraphBuilder::validate(args, 1);
+}
+
+CircleNode *CircleDensifyGraphBuilder::build_node(const circle::OperatorT &,
+                                                  const std::vector<CircleNode *> &inputs,
+                                                  loco::Graph *graph) const
+{
+  auto *node = graph->nodes()->create<CircleDensify>();
+  node->input(inputs.at(0));
+
+  // No options for Densify
+
+  return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.h b/compiler/luci/lang/include/luci/IR/CircleNodes.h
index d89ea03cc..901f1cbca 100644
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.h
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.h
@@ -32,6 +32,7 @@
 #include "Nodes/CircleConv2D.h"
 #include "Nodes/CircleCos.h"
 #include "Nodes/CircleCustom.h"
+#include "Nodes/CircleDensify.h"
 #include "Nodes/CircleDepthToSpace.h"
 #include "Nodes/CircleDepthwiseConv2D.h"
 #include "Nodes/CircleDequantize.h"
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.lst b/compiler/luci/lang/include/luci/IR/CircleNodes.lst
index 1472008df..f227a03f5 100644
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.lst
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.lst
@@ -30,6 +30,7 @@ CIRCLE_NODE(CONCATENATION, CircleConcatenation)
 CIRCLE_NODE(CONV_2D, CircleConv2D)
 CIRCLE_NODE(COS, CircleCos)
 CIRCLE_NODE(CUSTOM, CircleCustom)
+CIRCLE_NODE(DENSIFY, CircleDensify)
 CIRCLE_NODE(DEPTH_TO_SPACE, CircleDepthToSpace)
 CIRCLE_NODE(DEPTHWISE_CONV_2D, CircleDepthwiseConv2D)
 CIRCLE_NODE(DEQUANTIZE, CircleDequantize)
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleDensify.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleDensify.h
new file mode 100644
index 000000000..7acad0341
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleDensify.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_DENSIFY_H__
+#define __LUCI_IR_CIRCLE_DENSIFY_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief DENSIFY in Circle
+ */
+class CircleDensify final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::DENSIFY>>
+{
+public:
+  loco::Node *input(void) const { return at(0)->node(); }
+  void input(loco::Node *node) { at(0)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_DENSIFY_H__
diff --git a/compiler/luci/lang/src/Nodes/CircleConst.cpp b/compiler/luci/lang/src/Nodes/CircleConst.cpp
index c2d82c8a2..a4854ec59 100644
--- a/compiler/luci/lang/src/Nodes/CircleConst.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleConst.cpp
@@ -77,6 +77,7 @@ INSTANTIATE(loco::DataType::S8);
 INSTANTIATE(loco::DataType::FLOAT32);
 INSTANTIATE(loco::DataType::U8);
 INSTANTIATE(loco::DataType::BOOL);
+INSTANTIATE(loco::DataType::FLOAT16);
 
 #undef INSTANTIATE
 
diff --git a/compiler/luci/lang/src/Nodes/CircleDensify.test.cpp b/compiler/luci/lang/src/Nodes/CircleDensify.test.cpp
new file mode 100644
index 000000000..ae83784a5
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleDensify.test.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleDensify.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleDensifyTest, constructor)
+{
+  luci::CircleDensify densify_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), densify_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::DENSIFY, densify_node.opcode());
+
+  ASSERT_EQ(nullptr, densify_node.input());
+}
+
+TEST(CircleDensifyTest, input_NEG)
+{
+  luci::CircleDensify densify_node;
+  luci::CircleDensify node;
+
+  densify_node.input(&node);
+  ASSERT_NE(nullptr, densify_node.input());
+
+  densify_node.input(nullptr);
+  ASSERT_EQ(nullptr, densify_node.input());
+}
+
+TEST(CircleDensifyTest, arity_NEG)
+{
+  luci::CircleDensify densify_node;
+
+  ASSERT_NO_THROW(densify_node.arg(0));
+  ASSERT_THROW(densify_node.arg(1), std::out_of_range);
+}
+
+TEST(CircleDensifyTest, visit_mutable_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+  {
+  };
+
+  luci::CircleDensify densify_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(densify_node.accept(&tv), std::exception);
+}
+
+TEST(CircleDensifyTest, visit_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeVisitor<void>
+  {
+  };
+
+  luci::CircleDensify densify_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(densify_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp
index eff0830b4..8409f250e 100644
--- a/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp
@@ -137,6 +137,7 @@ CircleNodeSummaryBuilder::create_builder(const luci::CircleNode *node)
     CIRCLE_NODE(CONV_2D, CircleConv2DSummaryBuilder)
     CIRCLE_NODE(COS, CircleCosSummaryBuilder)
     CIRCLE_NODE(CUSTOM, CircleCustomSummaryBuilder)
+    CIRCLE_NODE(DENSIFY, CircleDensifySummaryBuilder)
     CIRCLE_NODE(DEPTH_TO_SPACE, CircleDepthToSpaceSummaryBuilder)
     CIRCLE_NODE(DEPTHWISE_CONV_2D, CircleDepthwiseConv2DSummaryBuilder)
     CIRCLE_NODE(DEQUANTIZE, CircleDequantizeSummaryBuilder)
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp
index 6df9270e3..48e4579ea 100644
--- a/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp
@@ -374,6 +374,22 @@ void CircleConcatenationSummaryBuilder::build_attributes(const luci::CircleNode
   s.args().append("fused_activation_function", to_str(concat->fusedActivationFunction()));
 }
 
+void CircleConstSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                 locop::NodeSummary &s)
+{
+  auto circonst = loco::must_cast<const luci::CircleConst *>(node);
+  s.args().append("dtype", to_str(circonst->dtype()));
+  s.args().append("rank", std::to_string(circonst->rank()));
+  std::string shape;
+  for (uint32_t r = 0; r < circonst->rank(); ++r)
+  {
+    if (!shape.empty())
+      shape += " ";
+    shape += std::to_string(circonst->dim(r).value());
+  }
+  s.args().append("shape", "[" + shape + "]");
+}
+
 void CircleConstSummaryBuilder::update_status(locop::NodeSummary &s)
 {
   s.state(locop::NodeDesc::State::PartiallyKnown);
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilders.h b/compiler/luci/logex/src/CircleNodeSummaryBuilders.h
index 6cd24b7f1..f0cac4e5e 100644
--- a/compiler/luci/logex/src/CircleNodeSummaryBuilders.h
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilders.h
@@ -167,6 +167,7 @@ private:
 class CircleConstSummaryBuilder final : public CircleNodeSummaryBuilder
 {
 private:
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
   void update_status(locop::NodeSummary &s);
 };
 
@@ -189,6 +190,10 @@ private:
   void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
 };
 
+class CircleDensifySummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
 class CircleDepthToSpaceSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
 {
 private:
diff --git a/compiler/luci/partition/include/luci/ConnectNode.h b/compiler/luci/partition/include/luci/ConnectNode.h
new file mode 100644
index 000000000..2d9d41d77
--- /dev/null
+++ b/compiler/luci/partition/include/luci/ConnectNode.h
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_CONNECT_NODE_H__
+#define __LUCI_PARTITION_CONNECT_NODE_H__
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+/**
+ * @note MapNode2Clone is used as a map from original node to cloned node
+ *       to find input of a cloned node
+ *
+ *   (Original)              (Clone)
+ *
+ *     [A]                  [A']
+ *      |   [B]              |   [B']
+ *      |    |               |    |
+ *       \  /                 \  /
+ *        [C]                 [C']
+ *
+ *  From view of [C'] we need to find [A'] and [B']. We know [C] from [C'],
+ *  then we can get from input of [C] as [A], [B] then [A]->[A'] and [B]->[B']
+ *  from the map.
+ */
+using MapNode2Clone = std::map<const CircleNode * /* ORG */, CircleNode * /* CLONE */>;
+
+struct CloneContext
+{
+  std::pair<MapNode2Clone::iterator, bool> emplace(const CircleNode *org, CircleNode *clone)
+  {
+    return node2clone.emplace(org, clone);
+  }
+  MapNode2Clone::iterator find(const CircleNode *org) { return node2clone.find(org); }
+  MapNode2Clone::iterator end(void) { return node2clone.end(); }
+
+  MapNode2Clone::const_iterator find(const CircleNode *org) const { return node2clone.find(org); }
+  MapNode2Clone::const_iterator end(void) const { return node2clone.end(); }
+
+  MapNode2Clone node2clone;
+};
+
+class ConnectNode final : public luci::CircleNodeVisitor<void>
+{
+public:
+  ConnectNode(luci::CloneContext &clonecontext) : _clonecontext(clonecontext){};
+
+public:
+  void visit(const luci::CircleAbs *) final;
+  void visit(const luci::CircleAdd *) final;
+  void visit(const luci::CircleAddN *) final;
+  void visit(const luci::CircleArgMax *) final;
+  void visit(const luci::CircleArgMin *) final;
+  void visit(const luci::CircleAveragePool2D *) final;
+  void visit(const luci::CircleBatchMatMul *) final;
+  void visit(const luci::CircleBatchToSpaceND *) final;
+  void visit(const luci::CircleCast *) final;
+  void visit(const luci::CircleCeil *) final;
+  void visit(const luci::CircleConcatenation *) final;
+  void visit(const luci::CircleConst *) final;
+  void visit(const luci::CircleConv2D *) final;
+  void visit(const luci::CircleCos *) final;
+  void visit(const luci::CircleCustom *) final;
+  void visit(const luci::CircleDensify *) final;
+  void visit(const luci::CircleDepthToSpace *) final;
+  void visit(const luci::CircleDepthwiseConv2D *) final;
+  void visit(const luci::CircleDequantize *) final;
+  void visit(const luci::CircleDiv *) final;
+  void visit(const luci::CircleElu *) final;
+  void visit(const luci::CircleEqual *) final;
+  void visit(const luci::CircleExp *) final;
+  void visit(const luci::CircleExpandDims *) final;
+  void visit(const luci::CircleFakeQuant *) final;
+  void visit(const luci::CircleFill *) final;
+  void visit(const luci::CircleFloor *) final;
+  void visit(const luci::CircleFloorDiv *) final;
+  void visit(const luci::CircleFloorMod *) final;
+  void visit(const luci::CircleFullyConnected *) final;
+  void visit(const luci::CircleGather *) final;
+  void visit(const luci::CircleGatherNd *) final;
+  void visit(const luci::CircleGreater *) final;
+  void visit(const luci::CircleGreaterEqual *) final;
+  void visit(const luci::CircleIf *) final;
+  void visit(const luci::CircleL2Normalize *) final;
+  void visit(const luci::CircleL2Pool2D *) final;
+  void visit(const luci::CircleLeakyRelu *) final;
+  void visit(const luci::CircleLess *) final;
+  void visit(const luci::CircleLessEqual *) final;
+  void visit(const luci::CircleLocalResponseNormalization *) final;
+  void visit(const luci::CircleLog *) final;
+  void visit(const luci::CircleLogicalAnd *) final;
+  void visit(const luci::CircleLogicalNot *) final;
+  void visit(const luci::CircleLogicalOr *) final;
+  void visit(const luci::CircleLogistic *) final;
+  void visit(const luci::CircleLogSoftmax *) final;
+  void visit(const luci::CircleMatrixDiag *) final;
+  void visit(const luci::CircleMatrixSetDiag *) final;
+  void visit(const luci::CircleMaximum *) final;
+  void visit(const luci::CircleMaxPool2D *) final;
+  void visit(const luci::CircleMean *) final;
+  void visit(const luci::CircleMinimum *) final;
+  void visit(const luci::CircleMirrorPad *) final;
+  void visit(const luci::CircleMul *) final;
+  void visit(const luci::CircleNeg *) final;
+  void visit(const luci::CircleNonMaxSuppressionV4 *) final;
+  void visit(const luci::CircleNonMaxSuppressionV5 *) final;
+  void visit(const luci::CircleNotEqual *) final;
+  void visit(const luci::CircleOneHot *) final;
+  void visit(const luci::CirclePack *) final;
+  void visit(const luci::CirclePad *) final;
+  void visit(const luci::CirclePadV2 *) final;
+  void visit(const luci::CirclePow *) final;
+  void visit(const luci::CirclePRelu *) final;
+  void visit(const luci::CircleQuantize *) final;
+  void visit(const luci::CircleRange *) final;
+  void visit(const luci::CircleRank *) final;
+  void visit(const luci::CircleReduceAny *) final;
+  void visit(const luci::CircleReduceMax *) final;
+  void visit(const luci::CircleReduceMin *) final;
+  void visit(const luci::CircleReduceProd *) final;
+  void visit(const luci::CircleRelu *) final;
+  void visit(const luci::CircleRelu6 *) final;
+  void visit(const luci::CircleReluN1To1 *) final;
+  void visit(const luci::CircleReshape *) final;
+  void visit(const luci::CircleResizeBilinear *) final;
+  void visit(const luci::CircleResizeNearestNeighbor *) final;
+  void visit(const luci::CircleReverseSequence *) final;
+  void visit(const luci::CircleReverseV2 *) final;
+  void visit(const luci::CircleRound *) final;
+  void visit(const luci::CircleRsqrt *) final;
+  void visit(const luci::CircleScatterNd *) final;
+  void visit(const luci::CircleSegmentSum *) final;
+  void visit(const luci::CircleSelect *) final;
+  void visit(const luci::CircleSelectV2 *) final;
+  void visit(const luci::CircleShape *) final;
+  void visit(const luci::CircleSin *) final;
+  void visit(const luci::CircleSlice *) final;
+  void visit(const luci::CircleSoftmax *) final;
+  void visit(const luci::CircleSpaceToBatchND *) final;
+  void visit(const luci::CircleSpaceToDepth *) final;
+  void visit(const luci::CircleSparseToDense *) final;
+  void visit(const luci::CircleSplit *) final;
+  void visit(const luci::CircleSplitV *) final;
+  void visit(const luci::CircleSqrt *) final;
+  void visit(const luci::CircleSquare *) final;
+  void visit(const luci::CircleSquaredDifference *) final;
+  void visit(const luci::CircleSqueeze *) final;
+  void visit(const luci::CircleStridedSlice *) final;
+  void visit(const luci::CircleSVDF *) final;
+  void visit(const luci::CircleSub *) final;
+  void visit(const luci::CircleSum *) final;
+  void visit(const luci::CircleTanh *) final;
+  void visit(const luci::CircleTile *) final;
+  void visit(const luci::CircleTopKV2 *) final;
+  void visit(const luci::CircleTranspose *) final;
+  void visit(const luci::CircleTransposeConv *) final;
+  void visit(const luci::CircleUnidirectionalSequenceLSTM *) final;
+  void visit(const luci::CircleUnique *) final;
+  void visit(const luci::CircleUnpack *) final;
+  void visit(const luci::CircleWhere *) final;
+  void visit(const luci::CircleWhile *) final;
+  void visit(const luci::CircleZerosLike *) final;
+
+  // Circle Only
+  void visit(const luci::CircleBCQFullyConnected *) final;
+  void visit(const luci::CircleBCQGather *) final;
+  void visit(const luci::CircleInstanceNorm *) final;
+
+  // NOTE CircleInput and CircleOutput are not handled here as these need
+  //      link with graph I/O
+
+  // Virtual
+  void visit(const luci::CircleCustomOut *) final;
+  void visit(const luci::CircleIfOut *) final;
+  // void visit(const luci::CircleInput *) final;
+  void visit(const luci::CircleNonMaxSuppressionV4Out *) final;
+  void visit(const luci::CircleNonMaxSuppressionV5Out *) final;
+  // void visit(const luci::CircleOutput *) final;
+  void visit(const luci::CircleOutputDummy *) final;
+  void visit(const luci::CircleOutputExclude *) final;
+  void visit(const luci::CircleSplitOut *) final;
+  void visit(const luci::CircleSplitVOut *) final;
+  void visit(const luci::CircleTopKV2Out *) final;
+  void visit(const luci::CircleUniqueOut *) final;
+  void visit(const luci::CircleUnpackOut *) final;
+  void visit(const luci::CircleVariable *) final;
+  void visit(const luci::CircleWhileOut *) final;
+
+public:
+  luci::CircleNode *find_clone(const luci::CircleNode *node);
+
+protected:
+  luci::CloneContext &_clonecontext;
+};
+
+/**
+ * @brief Connect cloned node from input node
+ */
+void clone_connect(const luci::CircleNode *node, luci::CloneContext &clonecontext);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITION_CONNECT_NODE_H__
diff --git a/compiler/luci/partition/src/ConnectNode.cpp b/compiler/luci/partition/src/ConnectNode.cpp
index 336be7c57..3d8c211c0 100644
--- a/compiler/luci/partition/src/ConnectNode.cpp
+++ b/compiler/luci/partition/src/ConnectNode.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include <oops/UserExn.h>
 
diff --git a/compiler/luci/partition/src/ConnectNode.h b/compiler/luci/partition/src/ConnectNode.h
deleted file mode 100644
index e60567c69..000000000
--- a/compiler/luci/partition/src/ConnectNode.h
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_PARTITION_CONNECT_NODE_H__
-#define __LUCI_PARTITION_CONNECT_NODE_H__
-
-#include <luci/IR/CircleNode.h>
-#include <luci/IR/CircleNodeVisitor.h>
-
-namespace luci
-{
-
-/**
- * @note MapNode2Clone is used as a map from original node to cloned node
- *       to find input of a cloned node
- *
- *   (Original)              (Clone)
- *
- *     [A]                  [A']
- *      |   [B]              |   [B']
- *      |    |               |    |
- *       \  /                 \  /
- *        [C]                 [C']
- *
- *  From view of [C'] we need to find [A'] and [B']. We know [C] from [C'],
- *  then we can get from input of [C] as [A], [B] then [A]->[A'] and [B]->[B']
- *  from the map.
- */
-using MapNode2Clone = std::map<const CircleNode * /* ORG */, CircleNode * /* CLONE */>;
-
-struct CloneContext
-{
-  std::pair<MapNode2Clone::iterator, bool> emplace(const CircleNode *org, CircleNode *clone)
-  {
-    return node2clone.emplace(org, clone);
-  }
-  MapNode2Clone::iterator find(const CircleNode *org) { return node2clone.find(org); }
-  MapNode2Clone::iterator end(void) { return node2clone.end(); }
-
-  MapNode2Clone::const_iterator find(const CircleNode *org) const { return node2clone.find(org); }
-  MapNode2Clone::const_iterator end(void) const { return node2clone.end(); }
-
-  MapNode2Clone node2clone;
-};
-
-class ConnectNode final : public luci::CircleNodeVisitor<void>
-{
-public:
-  ConnectNode(luci::CloneContext &clonecontext) : _clonecontext(clonecontext){};
-
-public:
-  void visit(const luci::CircleAbs *) final;
-  void visit(const luci::CircleAdd *) final;
-  void visit(const luci::CircleAddN *) final;
-  void visit(const luci::CircleArgMax *) final;
-  void visit(const luci::CircleArgMin *) final;
-  void visit(const luci::CircleAveragePool2D *) final;
-  void visit(const luci::CircleBatchMatMul *) final;
-  void visit(const luci::CircleBatchToSpaceND *) final;
-  void visit(const luci::CircleCast *) final;
-  void visit(const luci::CircleCeil *) final;
-  void visit(const luci::CircleConcatenation *) final;
-  void visit(const luci::CircleConst *) final;
-  void visit(const luci::CircleConv2D *) final;
-  void visit(const luci::CircleCos *) final;
-  void visit(const luci::CircleCustom *) final;
-  void visit(const luci::CircleDepthToSpace *) final;
-  void visit(const luci::CircleDepthwiseConv2D *) final;
-  void visit(const luci::CircleDequantize *) final;
-  void visit(const luci::CircleDiv *) final;
-  void visit(const luci::CircleElu *) final;
-  void visit(const luci::CircleEqual *) final;
-  void visit(const luci::CircleExp *) final;
-  void visit(const luci::CircleExpandDims *) final;
-  void visit(const luci::CircleFakeQuant *) final;
-  void visit(const luci::CircleFill *) final;
-  void visit(const luci::CircleFloor *) final;
-  void visit(const luci::CircleFloorDiv *) final;
-  void visit(const luci::CircleFloorMod *) final;
-  void visit(const luci::CircleFullyConnected *) final;
-  void visit(const luci::CircleGather *) final;
-  void visit(const luci::CircleGatherNd *) final;
-  void visit(const luci::CircleGreater *) final;
-  void visit(const luci::CircleGreaterEqual *) final;
-  void visit(const luci::CircleIf *) final;
-  void visit(const luci::CircleL2Normalize *) final;
-  void visit(const luci::CircleL2Pool2D *) final;
-  void visit(const luci::CircleLeakyRelu *) final;
-  void visit(const luci::CircleLess *) final;
-  void visit(const luci::CircleLessEqual *) final;
-  void visit(const luci::CircleLocalResponseNormalization *) final;
-  void visit(const luci::CircleLog *) final;
-  void visit(const luci::CircleLogicalAnd *) final;
-  void visit(const luci::CircleLogicalNot *) final;
-  void visit(const luci::CircleLogicalOr *) final;
-  void visit(const luci::CircleLogistic *) final;
-  void visit(const luci::CircleLogSoftmax *) final;
-  void visit(const luci::CircleMatrixDiag *) final;
-  void visit(const luci::CircleMatrixSetDiag *) final;
-  void visit(const luci::CircleMaximum *) final;
-  void visit(const luci::CircleMaxPool2D *) final;
-  void visit(const luci::CircleMean *) final;
-  void visit(const luci::CircleMinimum *) final;
-  void visit(const luci::CircleMirrorPad *) final;
-  void visit(const luci::CircleMul *) final;
-  void visit(const luci::CircleNeg *) final;
-  void visit(const luci::CircleNonMaxSuppressionV4 *) final;
-  void visit(const luci::CircleNonMaxSuppressionV5 *) final;
-  void visit(const luci::CircleNotEqual *) final;
-  void visit(const luci::CircleOneHot *) final;
-  void visit(const luci::CirclePack *) final;
-  void visit(const luci::CirclePad *) final;
-  void visit(const luci::CirclePadV2 *) final;
-  void visit(const luci::CirclePow *) final;
-  void visit(const luci::CirclePRelu *) final;
-  void visit(const luci::CircleQuantize *) final;
-  void visit(const luci::CircleRange *) final;
-  void visit(const luci::CircleRank *) final;
-  void visit(const luci::CircleReduceAny *) final;
-  void visit(const luci::CircleReduceMax *) final;
-  void visit(const luci::CircleReduceMin *) final;
-  void visit(const luci::CircleReduceProd *) final;
-  void visit(const luci::CircleRelu *) final;
-  void visit(const luci::CircleRelu6 *) final;
-  void visit(const luci::CircleReluN1To1 *) final;
-  void visit(const luci::CircleReshape *) final;
-  void visit(const luci::CircleResizeBilinear *) final;
-  void visit(const luci::CircleResizeNearestNeighbor *) final;
-  void visit(const luci::CircleReverseSequence *) final;
-  void visit(const luci::CircleReverseV2 *) final;
-  void visit(const luci::CircleRound *) final;
-  void visit(const luci::CircleRsqrt *) final;
-  void visit(const luci::CircleScatterNd *) final;
-  void visit(const luci::CircleSegmentSum *) final;
-  void visit(const luci::CircleSelect *) final;
-  void visit(const luci::CircleSelectV2 *) final;
-  void visit(const luci::CircleShape *) final;
-  void visit(const luci::CircleSin *) final;
-  void visit(const luci::CircleSlice *) final;
-  void visit(const luci::CircleSoftmax *) final;
-  void visit(const luci::CircleSpaceToBatchND *) final;
-  void visit(const luci::CircleSpaceToDepth *) final;
-  void visit(const luci::CircleSparseToDense *) final;
-  void visit(const luci::CircleSplit *) final;
-  void visit(const luci::CircleSplitV *) final;
-  void visit(const luci::CircleSqrt *) final;
-  void visit(const luci::CircleSquare *) final;
-  void visit(const luci::CircleSquaredDifference *) final;
-  void visit(const luci::CircleSqueeze *) final;
-  void visit(const luci::CircleStridedSlice *) final;
-  void visit(const luci::CircleSVDF *) final;
-  void visit(const luci::CircleSub *) final;
-  void visit(const luci::CircleSum *) final;
-  void visit(const luci::CircleTanh *) final;
-  void visit(const luci::CircleTile *) final;
-  void visit(const luci::CircleTopKV2 *) final;
-  void visit(const luci::CircleTranspose *) final;
-  void visit(const luci::CircleTransposeConv *) final;
-  void visit(const luci::CircleUnidirectionalSequenceLSTM *) final;
-  void visit(const luci::CircleUnique *) final;
-  void visit(const luci::CircleUnpack *) final;
-  void visit(const luci::CircleWhere *) final;
-  void visit(const luci::CircleWhile *) final;
-  void visit(const luci::CircleZerosLike *) final;
-
-  // Circle Only
-  void visit(const luci::CircleBCQFullyConnected *) final;
-  void visit(const luci::CircleBCQGather *) final;
-  void visit(const luci::CircleInstanceNorm *) final;
-
-  // NOTE CircleInput and CircleOutput are not handled here as these need
-  //      link with graph I/O
-
-  // Virtual
-  void visit(const luci::CircleCustomOut *) final;
-  void visit(const luci::CircleIfOut *) final;
-  // void visit(const luci::CircleInput *) final;
-  void visit(const luci::CircleNonMaxSuppressionV4Out *) final;
-  void visit(const luci::CircleNonMaxSuppressionV5Out *) final;
-  // void visit(const luci::CircleOutput *) final;
-  void visit(const luci::CircleOutputDummy *) final;
-  void visit(const luci::CircleOutputExclude *) final;
-  void visit(const luci::CircleSplitOut *) final;
-  void visit(const luci::CircleSplitVOut *) final;
-  void visit(const luci::CircleTopKV2Out *) final;
-  void visit(const luci::CircleUniqueOut *) final;
-  void visit(const luci::CircleUnpackOut *) final;
-  void visit(const luci::CircleVariable *) final;
-  void visit(const luci::CircleWhileOut *) final;
-
-public:
-  luci::CircleNode *find_clone(const luci::CircleNode *node);
-
-protected:
-  luci::CloneContext &_clonecontext;
-};
-
-/**
- * @brief Connect cloned node from input node
- */
-void clone_connect(const luci::CircleNode *node, luci::CloneContext &clonecontext);
-
-} // namespace luci
-
-#endif // __LUCI_PARTITION_CONNECT_NODE_H__
diff --git a/compiler/luci/partition/src/ConnectNode.test.h b/compiler/luci/partition/src/ConnectNode.test.h
index ac4878a15..18bb52a20 100644
--- a/compiler/luci/partition/src/ConnectNode.test.h
+++ b/compiler/luci/partition/src/ConnectNode.test.h
@@ -17,7 +17,7 @@
 #ifndef __CONNECT_NODE_TEST_H__
 #define __CONNECT_NODE_TEST_H__
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include <luci/Service/CircleNodeClone.h>
 #include <luci/test/TestIOGraph.h>
diff --git a/compiler/luci/partition/src/Nodes/CircleAbs.cpp b/compiler/luci/partition/src/Nodes/CircleAbs.cpp
index a3fde4c45..a7fbc37d1 100644
--- a/compiler/luci/partition/src/Nodes/CircleAbs.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleAbs.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleAbs.test.cpp b/compiler/luci/partition/src/Nodes/CircleAbs.test.cpp
index f3e721525..ac805c1af 100644
--- a/compiler/luci/partition/src/Nodes/CircleAbs.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleAbs.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleAdd.cpp b/compiler/luci/partition/src/Nodes/CircleAdd.cpp
index d393997e9..0754be626 100644
--- a/compiler/luci/partition/src/Nodes/CircleAdd.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleAdd.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp b/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp
index e457b83d2..99ae52c54 100644
--- a/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleAddN.cpp b/compiler/luci/partition/src/Nodes/CircleAddN.cpp
index 81e5e0949..90aaeee3a 100644
--- a/compiler/luci/partition/src/Nodes/CircleAddN.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleAddN.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleAddN.test.cpp b/compiler/luci/partition/src/Nodes/CircleAddN.test.cpp
index 5d0a7489f..37743d3a3 100644
--- a/compiler/luci/partition/src/Nodes/CircleAddN.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleAddN.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleArgMax.cpp b/compiler/luci/partition/src/Nodes/CircleArgMax.cpp
index 1409586d7..99b30d38f 100644
--- a/compiler/luci/partition/src/Nodes/CircleArgMax.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleArgMax.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp b/compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp
index c816fbeb8..77248e07e 100644
--- a/compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleArgMin.cpp b/compiler/luci/partition/src/Nodes/CircleArgMin.cpp
index 6151aa98a..1bb3d84e7 100644
--- a/compiler/luci/partition/src/Nodes/CircleArgMin.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleArgMin.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp b/compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp
index d150be4d6..ed0cf030c 100644
--- a/compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp
index 547665771..1df86c7be 100644
--- a/compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp
index fba2be835..266120b92 100644
--- a/compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp
index 5b1dd8543..6d50f0e31 100644
--- a/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp
index 3d64f4b29..2191f5b0a 100644
--- a/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleBCQGather.cpp b/compiler/luci/partition/src/Nodes/CircleBCQGather.cpp
index 90c4d9ef3..a9e810a27 100644
--- a/compiler/luci/partition/src/Nodes/CircleBCQGather.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleBCQGather.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp b/compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp
index bbbd3f157..0324d85e0 100644
--- a/compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp
index c3992a64e..5a459e78c 100644
--- a/compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp
index 94336d36a..e6d26a6a1 100644
--- a/compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp
index 2a463afb1..40b8f7052 100644
--- a/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp
index 544f5e127..e9cb350b8 100644
--- a/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleCast.cpp b/compiler/luci/partition/src/Nodes/CircleCast.cpp
index f7630cd85..e1301aa06 100644
--- a/compiler/luci/partition/src/Nodes/CircleCast.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCast.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleCast.test.cpp b/compiler/luci/partition/src/Nodes/CircleCast.test.cpp
index 005119060..d7b679aa2 100644
--- a/compiler/luci/partition/src/Nodes/CircleCast.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCast.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleCeil.cpp b/compiler/luci/partition/src/Nodes/CircleCeil.cpp
index a0c94033e..e7b5f5a3f 100644
--- a/compiler/luci/partition/src/Nodes/CircleCeil.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCeil.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleCeil.test.cpp b/compiler/luci/partition/src/Nodes/CircleCeil.test.cpp
index dbd7e5390..cb0364844 100644
--- a/compiler/luci/partition/src/Nodes/CircleCeil.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCeil.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleConcatenation.cpp b/compiler/luci/partition/src/Nodes/CircleConcatenation.cpp
index fb24d21ca..d895685f0 100644
--- a/compiler/luci/partition/src/Nodes/CircleConcatenation.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleConcatenation.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp b/compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp
index 4d64b85a2..b5c05e25d 100644
--- a/compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleConst.cpp b/compiler/luci/partition/src/Nodes/CircleConst.cpp
index 118cd8de2..b88f5ef4e 100644
--- a/compiler/luci/partition/src/Nodes/CircleConst.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleConst.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace luci
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleConv2D.cpp b/compiler/luci/partition/src/Nodes/CircleConv2D.cpp
index 46716f0ec..ca9cce18f 100644
--- a/compiler/luci/partition/src/Nodes/CircleConv2D.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleConv2D.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp
index 829adec9b..4596d9618 100644
--- a/compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleCos.cpp b/compiler/luci/partition/src/Nodes/CircleCos.cpp
index 9dcf81e83..76b1baac3 100644
--- a/compiler/luci/partition/src/Nodes/CircleCos.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCos.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleCos.test.cpp b/compiler/luci/partition/src/Nodes/CircleCos.test.cpp
index 6c92b93fb..ba806a3f9 100644
--- a/compiler/luci/partition/src/Nodes/CircleCos.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCos.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleCustom.cpp b/compiler/luci/partition/src/Nodes/CircleCustom.cpp
index ac16ebe40..cc1604876 100644
--- a/compiler/luci/partition/src/Nodes/CircleCustom.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCustom.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleCustom.test.cpp b/compiler/luci/partition/src/Nodes/CircleCustom.test.cpp
index 9f40b5220..f7fe86674 100644
--- a/compiler/luci/partition/src/Nodes/CircleCustom.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCustom.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleCustomOut.cpp b/compiler/luci/partition/src/Nodes/CircleCustomOut.cpp
index fee1a1a8c..0d83cffaa 100644
--- a/compiler/luci/partition/src/Nodes/CircleCustomOut.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCustomOut.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp
index 0a293970e..ddd4e93f2 100644
--- a/compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleDensify.cpp b/compiler/luci/partition/src/Nodes/CircleDensify.cpp
new file mode 100644
index 000000000..cfb236a5d
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleDensify.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleDensify *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleDensify *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+  cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleDensify *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleDensify.test.cpp b/compiler/luci/partition/src/Nodes/CircleDensify.test.cpp
new file mode 100644
index 000000000..94076a8db
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleDensify.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleDensify>
+{
+public:
+  NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIOGraph::init(shape, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input());
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Densify)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDensify *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDensify *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(1, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Densify_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDensify *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleDensify *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp
index ade266e41..c044b4c42 100644
--- a/compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp
index 997360a9b..1b61a3517 100644
--- a/compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp
index 19d1d5f42..2bd9ab5ca 100644
--- a/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp
index 681f98bdb..02976a488 100644
--- a/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleDequantize.cpp b/compiler/luci/partition/src/Nodes/CircleDequantize.cpp
index 3a520d4e9..ac2642bc1 100644
--- a/compiler/luci/partition/src/Nodes/CircleDequantize.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleDequantize.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp b/compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp
index 7f6006c1d..d3a43d374 100644
--- a/compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleDiv.cpp b/compiler/luci/partition/src/Nodes/CircleDiv.cpp
index 480338542..8941a4196 100644
--- a/compiler/luci/partition/src/Nodes/CircleDiv.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleDiv.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp b/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp
index 226932337..7900beafc 100644
--- a/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleElu.cpp b/compiler/luci/partition/src/Nodes/CircleElu.cpp
index d21cd4c01..b77226574 100644
--- a/compiler/luci/partition/src/Nodes/CircleElu.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleElu.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleElu.test.cpp b/compiler/luci/partition/src/Nodes/CircleElu.test.cpp
index 94774cca8..20b205048 100644
--- a/compiler/luci/partition/src/Nodes/CircleElu.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleElu.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleEqual.cpp b/compiler/luci/partition/src/Nodes/CircleEqual.cpp
index 6a126c0e2..2dc0e759b 100644
--- a/compiler/luci/partition/src/Nodes/CircleEqual.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleEqual.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleEqual.test.cpp
index 20b539199..c0d3bd915 100644
--- a/compiler/luci/partition/src/Nodes/CircleEqual.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleEqual.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleExp.cpp b/compiler/luci/partition/src/Nodes/CircleExp.cpp
index 95fb1cd67..c1da7908a 100644
--- a/compiler/luci/partition/src/Nodes/CircleExp.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleExp.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleExp.test.cpp b/compiler/luci/partition/src/Nodes/CircleExp.test.cpp
index 16d7244ab..286f205bf 100644
--- a/compiler/luci/partition/src/Nodes/CircleExp.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleExp.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleExpandDims.cpp b/compiler/luci/partition/src/Nodes/CircleExpandDims.cpp
index 6fccd6310..a6ce6495c 100644
--- a/compiler/luci/partition/src/Nodes/CircleExpandDims.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleExpandDims.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp b/compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp
index 8a5156509..37af10f52 100644
--- a/compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp b/compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp
index 4855d80ae..5dfaee1b5 100644
--- a/compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp b/compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp
index 3821d755a..2a2ec0cff 100644
--- a/compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleFill.cpp b/compiler/luci/partition/src/Nodes/CircleFill.cpp
index 06fca7b41..32688cd9b 100644
--- a/compiler/luci/partition/src/Nodes/CircleFill.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFill.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleFill.test.cpp b/compiler/luci/partition/src/Nodes/CircleFill.test.cpp
index 97a5a348d..4b3872a80 100644
--- a/compiler/luci/partition/src/Nodes/CircleFill.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFill.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleFloor.cpp b/compiler/luci/partition/src/Nodes/CircleFloor.cpp
index 7ad392461..f7409a221 100644
--- a/compiler/luci/partition/src/Nodes/CircleFloor.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFloor.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleFloor.test.cpp b/compiler/luci/partition/src/Nodes/CircleFloor.test.cpp
index 1a964ea21..883d36256 100644
--- a/compiler/luci/partition/src/Nodes/CircleFloor.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFloor.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp b/compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp
index 3b92b00c6..57e435c23 100644
--- a/compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp b/compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp
index 3d2801566..1eb603c5d 100644
--- a/compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleFloorMod.cpp b/compiler/luci/partition/src/Nodes/CircleFloorMod.cpp
index 9f868d0e5..1b942d200 100644
--- a/compiler/luci/partition/src/Nodes/CircleFloorMod.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFloorMod.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp b/compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp
index 89a09411b..680bf1680 100644
--- a/compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp b/compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp
index da273037a..206b47aec 100644
--- a/compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp b/compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp
index fc88204bd..39eea5571 100644
--- a/compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleGather.cpp b/compiler/luci/partition/src/Nodes/CircleGather.cpp
index 0ee458394..4f059cbe4 100644
--- a/compiler/luci/partition/src/Nodes/CircleGather.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleGather.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleGather.test.cpp b/compiler/luci/partition/src/Nodes/CircleGather.test.cpp
index 7f4e08435..f427e0456 100644
--- a/compiler/luci/partition/src/Nodes/CircleGather.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleGather.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleGatherNd.cpp b/compiler/luci/partition/src/Nodes/CircleGatherNd.cpp
index 4be05ca94..6a9c3b47f 100644
--- a/compiler/luci/partition/src/Nodes/CircleGatherNd.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleGatherNd.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp b/compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp
index d673698e1..0207e917d 100644
--- a/compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleGreater.cpp b/compiler/luci/partition/src/Nodes/CircleGreater.cpp
index 7bc2a14c9..9f4b18fde 100644
--- a/compiler/luci/partition/src/Nodes/CircleGreater.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleGreater.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleGreater.test.cpp b/compiler/luci/partition/src/Nodes/CircleGreater.test.cpp
index 842370d42..61d1f5957 100644
--- a/compiler/luci/partition/src/Nodes/CircleGreater.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleGreater.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp
index 536a0aed6..76130a843 100644
--- a/compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp
index 76dc770f8..7e4e1ef74 100644
--- a/compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleIf.cpp b/compiler/luci/partition/src/Nodes/CircleIf.cpp
index 1672a136d..45e4ec48b 100644
--- a/compiler/luci/partition/src/Nodes/CircleIf.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleIf.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleIf.test.cpp b/compiler/luci/partition/src/Nodes/CircleIf.test.cpp
index dbd25c822..cbb766221 100644
--- a/compiler/luci/partition/src/Nodes/CircleIf.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleIf.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleIfOut.cpp b/compiler/luci/partition/src/Nodes/CircleIfOut.cpp
index 969bdd93c..2eb5dda1f 100644
--- a/compiler/luci/partition/src/Nodes/CircleIfOut.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleIfOut.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp
index 9207654bc..ec2dde3b2 100644
--- a/compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp
index 386652fb1..f64ffd8b4 100644
--- a/compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp
index b932223d0..4363c6c18 100644
--- a/compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp b/compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp
index 61ddba264..df26930ec 100644
--- a/compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp b/compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp
index 4fc23727a..b114a15f0 100644
--- a/compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp
index 24333d507..1eacddb62 100644
--- a/compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp
index 40328488c..22f99d5ef 100644
--- a/compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp
index 3da1ba287..1702ddeb1 100644
--- a/compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp
index 5a0d1dd87..71dc55ea0 100644
--- a/compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleLess.cpp b/compiler/luci/partition/src/Nodes/CircleLess.cpp
index aab495fcc..52726f9be 100644
--- a/compiler/luci/partition/src/Nodes/CircleLess.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLess.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleLess.test.cpp b/compiler/luci/partition/src/Nodes/CircleLess.test.cpp
index ab65e5d18..c5d194efe 100644
--- a/compiler/luci/partition/src/Nodes/CircleLess.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLess.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleLessEqual.cpp b/compiler/luci/partition/src/Nodes/CircleLessEqual.cpp
index ec129dbe8..e9a3c412b 100644
--- a/compiler/luci/partition/src/Nodes/CircleLessEqual.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLessEqual.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp
index 0dd8986b6..29f4ababa 100644
--- a/compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp
index 6b0d1cd12..7a00bf94f 100644
--- a/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp
index e1973387d..5e5723817 100644
--- a/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleLog.cpp b/compiler/luci/partition/src/Nodes/CircleLog.cpp
index c43570fa2..676d22fc0 100644
--- a/compiler/luci/partition/src/Nodes/CircleLog.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLog.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleLog.test.cpp b/compiler/luci/partition/src/Nodes/CircleLog.test.cpp
index 8a43f6f01..0a2b97538 100644
--- a/compiler/luci/partition/src/Nodes/CircleLog.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLog.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp
index de582c80d..c67b08f0f 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp
index 1e60bf54c..b6daeb781 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp
index 28e8f42e5..1498d85ec 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp
index a1189f06f..0b9513626 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp
index e2657824c..f9c077e4e 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp
index f6b34596e..88dff3651 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp
index 418dc023b..59592e41d 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp
index fee3f4779..35f8029c0 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleLogistic.cpp b/compiler/luci/partition/src/Nodes/CircleLogistic.cpp
index 7d788512d..804597bed 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogistic.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogistic.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp
index c4b3f7fe3..241d84040 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp
index e92806aff..297e9f2cc 100644
--- a/compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp
index 03e3c3c3e..472cab8c8 100644
--- a/compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp
index 29bb7fe5f..b327aacad 100644
--- a/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp
index 5503ea18f..4ff797c43 100644
--- a/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp
index 75a665aee..dee90e5c0 100644
--- a/compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp
index 16996497a..949e0d724 100644
--- a/compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleMaximum.cpp b/compiler/luci/partition/src/Nodes/CircleMaximum.cpp
index 2ba6055b4..459917e3e 100644
--- a/compiler/luci/partition/src/Nodes/CircleMaximum.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMaximum.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp b/compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp
index 370174c37..e6a6d5741 100644
--- a/compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleMean.cpp b/compiler/luci/partition/src/Nodes/CircleMean.cpp
index b634e5838..c704d0054 100644
--- a/compiler/luci/partition/src/Nodes/CircleMean.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMean.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleMean.test.cpp b/compiler/luci/partition/src/Nodes/CircleMean.test.cpp
index 53435d9dc..838d7aea2 100644
--- a/compiler/luci/partition/src/Nodes/CircleMean.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMean.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleMinimum.cpp b/compiler/luci/partition/src/Nodes/CircleMinimum.cpp
index cdf757583..8958bf64a 100644
--- a/compiler/luci/partition/src/Nodes/CircleMinimum.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMinimum.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp b/compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp
index 2fe6b0da6..a6c86a27a 100644
--- a/compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp b/compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp
index 16a24abf7..91c3cb97a 100644
--- a/compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp b/compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp
index 605a126c9..b837e1012 100644
--- a/compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleMul.cpp b/compiler/luci/partition/src/Nodes/CircleMul.cpp
index 2cd2b4038..12e14728c 100644
--- a/compiler/luci/partition/src/Nodes/CircleMul.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMul.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleMul.test.cpp b/compiler/luci/partition/src/Nodes/CircleMul.test.cpp
index 99cf0824d..b316679f8 100644
--- a/compiler/luci/partition/src/Nodes/CircleMul.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMul.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleNeg.cpp b/compiler/luci/partition/src/Nodes/CircleNeg.cpp
index 413ad4930..e9dcc45cd 100644
--- a/compiler/luci/partition/src/Nodes/CircleNeg.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNeg.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleNeg.test.cpp b/compiler/luci/partition/src/Nodes/CircleNeg.test.cpp
index bd74a3665..ab13c9416 100644
--- a/compiler/luci/partition/src/Nodes/CircleNeg.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNeg.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp
index 63ff3f021..88d72e12f 100644
--- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp
index 2771aef49..e796a14c3 100644
--- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp
index 80e4704b9..61caa3a4c 100644
--- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp
index 5a0a8da8c..eb04f2688 100644
--- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp
index c1f117724..3b0b755a4 100644
--- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp
index 1f20fbb0f..c9c31b315 100644
--- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp
index 69e3cc8e8..3eed260c2 100644
--- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp
index e001b0b0b..2c5822fe3 100644
--- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleNotEqual.cpp b/compiler/luci/partition/src/Nodes/CircleNotEqual.cpp
index c40c2a21a..29a6a43bb 100644
--- a/compiler/luci/partition/src/Nodes/CircleNotEqual.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNotEqual.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp
index 360940ca7..2983e1b27 100644
--- a/compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleOneHot.cpp b/compiler/luci/partition/src/Nodes/CircleOneHot.cpp
index d76f49255..d172fb834 100644
--- a/compiler/luci/partition/src/Nodes/CircleOneHot.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleOneHot.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp b/compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp
index 3c555c290..59780e424 100644
--- a/compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp b/compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp
index a033e80a8..61d7620aa 100644
--- a/compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace luci
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp b/compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp
index 106eb405d..36ce35077 100644
--- a/compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace luci
 {
diff --git a/compiler/luci/partition/src/Nodes/CirclePRelu.cpp b/compiler/luci/partition/src/Nodes/CirclePRelu.cpp
index b8a2341c8..6a2325715 100644
--- a/compiler/luci/partition/src/Nodes/CirclePRelu.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePRelu.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp b/compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp
index e5bcedcf6..f2a2e2c7d 100644
--- a/compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CirclePack.cpp b/compiler/luci/partition/src/Nodes/CirclePack.cpp
index 326881067..d4b49bfa9 100644
--- a/compiler/luci/partition/src/Nodes/CirclePack.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePack.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CirclePack.test.cpp b/compiler/luci/partition/src/Nodes/CirclePack.test.cpp
index 68c513848..665b137e8 100644
--- a/compiler/luci/partition/src/Nodes/CirclePack.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePack.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CirclePad.cpp b/compiler/luci/partition/src/Nodes/CirclePad.cpp
index eb2a89c85..0a1d6f7f9 100644
--- a/compiler/luci/partition/src/Nodes/CirclePad.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePad.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CirclePad.test.cpp b/compiler/luci/partition/src/Nodes/CirclePad.test.cpp
index 24ea83fa3..72f97d6a4 100644
--- a/compiler/luci/partition/src/Nodes/CirclePad.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePad.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CirclePadV2.cpp b/compiler/luci/partition/src/Nodes/CirclePadV2.cpp
index 001fecbcb..969cc271d 100644
--- a/compiler/luci/partition/src/Nodes/CirclePadV2.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePadV2.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp b/compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp
index aea8e0cce..9829f6269 100644
--- a/compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CirclePow.cpp b/compiler/luci/partition/src/Nodes/CirclePow.cpp
index fb180ee69..ce69e7402 100644
--- a/compiler/luci/partition/src/Nodes/CirclePow.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePow.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CirclePow.test.cpp b/compiler/luci/partition/src/Nodes/CirclePow.test.cpp
index 7a5be4d13..f4e49c023 100644
--- a/compiler/luci/partition/src/Nodes/CirclePow.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePow.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleQuantize.cpp b/compiler/luci/partition/src/Nodes/CircleQuantize.cpp
index 340c1da42..903a94e32 100644
--- a/compiler/luci/partition/src/Nodes/CircleQuantize.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleQuantize.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp b/compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp
index 1f348b45c..5ca1a6baa 100644
--- a/compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleRange.cpp b/compiler/luci/partition/src/Nodes/CircleRange.cpp
index f295338d8..fa1a02c71 100644
--- a/compiler/luci/partition/src/Nodes/CircleRange.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRange.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleRange.test.cpp b/compiler/luci/partition/src/Nodes/CircleRange.test.cpp
index 59a95f119..b5b0c8aa8 100644
--- a/compiler/luci/partition/src/Nodes/CircleRange.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRange.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleRank.cpp b/compiler/luci/partition/src/Nodes/CircleRank.cpp
index f7cce762b..35b4764aa 100644
--- a/compiler/luci/partition/src/Nodes/CircleRank.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRank.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleRank.test.cpp b/compiler/luci/partition/src/Nodes/CircleRank.test.cpp
index 74c520bee..5a0a71a7e 100644
--- a/compiler/luci/partition/src/Nodes/CircleRank.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRank.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceAny.cpp b/compiler/luci/partition/src/Nodes/CircleReduceAny.cpp
index ed762dbc6..262e12ac1 100644
--- a/compiler/luci/partition/src/Nodes/CircleReduceAny.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReduceAny.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp
index 792f51187..45c292073 100644
--- a/compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMax.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMax.cpp
index 09586ecee..d91c78e41 100644
--- a/compiler/luci/partition/src/Nodes/CircleReduceMax.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReduceMax.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp
index 8fbaf653e..2ad18f339 100644
--- a/compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMin.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMin.cpp
index 105214d0b..65fca6ab3 100644
--- a/compiler/luci/partition/src/Nodes/CircleReduceMin.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReduceMin.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp
index c37d6248f..db48f54d7 100644
--- a/compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceProd.cpp b/compiler/luci/partition/src/Nodes/CircleReduceProd.cpp
index 2fb4e3e01..daac168b2 100644
--- a/compiler/luci/partition/src/Nodes/CircleReduceProd.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReduceProd.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp
index cc1ac83ad..f5f69f0ff 100644
--- a/compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleRelu.cpp b/compiler/luci/partition/src/Nodes/CircleRelu.cpp
index d3617bdbd..63ac31ba9 100644
--- a/compiler/luci/partition/src/Nodes/CircleRelu.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRelu.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleRelu.test.cpp b/compiler/luci/partition/src/Nodes/CircleRelu.test.cpp
index ccaf5760b..ec4d10f09 100644
--- a/compiler/luci/partition/src/Nodes/CircleRelu.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRelu.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleRelu6.cpp b/compiler/luci/partition/src/Nodes/CircleRelu6.cpp
index fb9ba6f36..c2956c456 100644
--- a/compiler/luci/partition/src/Nodes/CircleRelu6.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRelu6.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp b/compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp
index 1341b0e06..e9ecbe2e6 100644
--- a/compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp b/compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp
index 476195b71..1141297da 100644
--- a/compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp b/compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp
index 7dc63c6ef..ae60a97e5 100644
--- a/compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleReshape.cpp b/compiler/luci/partition/src/Nodes/CircleReshape.cpp
index e59670453..49f7c64a7 100644
--- a/compiler/luci/partition/src/Nodes/CircleReshape.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReshape.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleReshape.test.cpp b/compiler/luci/partition/src/Nodes/CircleReshape.test.cpp
index 73cbbdfcc..198cfa1b6 100644
--- a/compiler/luci/partition/src/Nodes/CircleReshape.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReshape.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp
index 0f504015b..41fdedf2a 100644
--- a/compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp
index c2d8b714b..437e448a6 100644
--- a/compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp
index c985b7f51..567db4961 100644
--- a/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp
index 9cc2e558e..5dc99a385 100644
--- a/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp b/compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp
index 225d29ea5..348cdbb78 100644
--- a/compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp b/compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp
index 408fc0c9c..751910326 100644
--- a/compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleReverseV2.cpp b/compiler/luci/partition/src/Nodes/CircleReverseV2.cpp
index d59a7de93..4b8c4a444 100644
--- a/compiler/luci/partition/src/Nodes/CircleReverseV2.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReverseV2.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp b/compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp
index d41ad8e66..351c6f2c0 100644
--- a/compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleRound.cpp b/compiler/luci/partition/src/Nodes/CircleRound.cpp
index 9170bcdd9..97d002870 100644
--- a/compiler/luci/partition/src/Nodes/CircleRound.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRound.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleRound.test.cpp b/compiler/luci/partition/src/Nodes/CircleRound.test.cpp
index fad090476..02f335dc3 100644
--- a/compiler/luci/partition/src/Nodes/CircleRound.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRound.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp b/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp
index 03e64aad0..44abd5ef7 100644
--- a/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp b/compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp
index d76b96e14..39ae1f8f3 100644
--- a/compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSVDF.cpp b/compiler/luci/partition/src/Nodes/CircleSVDF.cpp
index f661a794c..e2b99c49d 100644
--- a/compiler/luci/partition/src/Nodes/CircleSVDF.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSVDF.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp b/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp
index 5fae5206e..af8cd5549 100644
--- a/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleScatterNd.cpp b/compiler/luci/partition/src/Nodes/CircleScatterNd.cpp
index 62912b791..88a3ecf19 100644
--- a/compiler/luci/partition/src/Nodes/CircleScatterNd.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleScatterNd.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp b/compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp
index f271f8843..4ce787569 100644
--- a/compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp b/compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp
index 5fc320a16..6540416c6 100644
--- a/compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp b/compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp
index a6bcff20a..453b7cc01 100644
--- a/compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSelect.cpp b/compiler/luci/partition/src/Nodes/CircleSelect.cpp
index dbe1dd48f..436e95609 100644
--- a/compiler/luci/partition/src/Nodes/CircleSelect.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSelect.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSelect.test.cpp b/compiler/luci/partition/src/Nodes/CircleSelect.test.cpp
index 912934b8b..2a38de593 100644
--- a/compiler/luci/partition/src/Nodes/CircleSelect.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSelect.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSelectV2.cpp b/compiler/luci/partition/src/Nodes/CircleSelectV2.cpp
index 28072c860..a8b6ab556 100644
--- a/compiler/luci/partition/src/Nodes/CircleSelectV2.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSelectV2.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp b/compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp
index e8d128e93..c2ebdbe11 100644
--- a/compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleShape.cpp b/compiler/luci/partition/src/Nodes/CircleShape.cpp
index f93cf1458..2fb3dcdd8 100644
--- a/compiler/luci/partition/src/Nodes/CircleShape.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleShape.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleShape.test.cpp b/compiler/luci/partition/src/Nodes/CircleShape.test.cpp
index 9b4afdcc2..38033a3bc 100644
--- a/compiler/luci/partition/src/Nodes/CircleShape.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleShape.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSin.cpp b/compiler/luci/partition/src/Nodes/CircleSin.cpp
index 62c776ef6..0ef605994 100644
--- a/compiler/luci/partition/src/Nodes/CircleSin.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSin.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSin.test.cpp b/compiler/luci/partition/src/Nodes/CircleSin.test.cpp
index fbee6f662..e141b4530 100644
--- a/compiler/luci/partition/src/Nodes/CircleSin.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSin.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSlice.cpp b/compiler/luci/partition/src/Nodes/CircleSlice.cpp
index 7895d9ece..811d81f9e 100644
--- a/compiler/luci/partition/src/Nodes/CircleSlice.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSlice.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSlice.test.cpp b/compiler/luci/partition/src/Nodes/CircleSlice.test.cpp
index 3c666ad6c..0718c7f15 100644
--- a/compiler/luci/partition/src/Nodes/CircleSlice.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSlice.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSoftmax.cpp b/compiler/luci/partition/src/Nodes/CircleSoftmax.cpp
index 0a93787e7..6b08f005e 100644
--- a/compiler/luci/partition/src/Nodes/CircleSoftmax.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSoftmax.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp b/compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp
index b25629863..571ad80ff 100644
--- a/compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp
index b94948bee..dc48b36d6 100644
--- a/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp
index 279e9b232..0fcf22fd0 100644
--- a/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp
index bd4523ca8..55d562f3d 100644
--- a/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp
index 207163d08..771c1f372 100644
--- a/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp b/compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp
index d1ed18818..cc2f5e915 100644
--- a/compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp b/compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp
index 2257186e8..06b3814ee 100644
--- a/compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSplit.cpp b/compiler/luci/partition/src/Nodes/CircleSplit.cpp
index d6d62a8ed..5f851f049 100644
--- a/compiler/luci/partition/src/Nodes/CircleSplit.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSplit.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSplit.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplit.test.cpp
index d8d0953e0..a4242b9ab 100644
--- a/compiler/luci/partition/src/Nodes/CircleSplit.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSplit.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSplitOut.cpp b/compiler/luci/partition/src/Nodes/CircleSplitOut.cpp
index 4021f2042..1a447581e 100644
--- a/compiler/luci/partition/src/Nodes/CircleSplitOut.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSplitOut.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp
index 85fe2685b..b7cf6fc7d 100644
--- a/compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSplitV.cpp b/compiler/luci/partition/src/Nodes/CircleSplitV.cpp
index f13205725..43ebe076f 100644
--- a/compiler/luci/partition/src/Nodes/CircleSplitV.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSplitV.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp
index 3ac1d6c27..877a44759 100644
--- a/compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp b/compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp
index 2034805cd..4bac6c5dc 100644
--- a/compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp
index 434dfb0ad..b3cf4d939 100644
--- a/compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSqrt.cpp b/compiler/luci/partition/src/Nodes/CircleSqrt.cpp
index f737aac8d..fd6d0ec05 100644
--- a/compiler/luci/partition/src/Nodes/CircleSqrt.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSqrt.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp b/compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp
index fa7f7fe2a..be298835e 100644
--- a/compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSquare.cpp b/compiler/luci/partition/src/Nodes/CircleSquare.cpp
index 1476a8694..56dd5440d 100644
--- a/compiler/luci/partition/src/Nodes/CircleSquare.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSquare.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSquare.test.cpp b/compiler/luci/partition/src/Nodes/CircleSquare.test.cpp
index bb6a7c33f..a509b31b5 100644
--- a/compiler/luci/partition/src/Nodes/CircleSquare.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSquare.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp
index 40dd31706..e47be2c7e 100644
--- a/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp
index 9cfe9eefb..a900f1dc3 100644
--- a/compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSqueeze.cpp b/compiler/luci/partition/src/Nodes/CircleSqueeze.cpp
index bc9fda296..ffe3c911b 100644
--- a/compiler/luci/partition/src/Nodes/CircleSqueeze.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSqueeze.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp b/compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp
index 1f0971043..7a6e2bf44 100644
--- a/compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp b/compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp
index 3bdca8a8a..953b45107 100644
--- a/compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp b/compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp
index 130ff9159..3e950fd25 100644
--- a/compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSub.cpp b/compiler/luci/partition/src/Nodes/CircleSub.cpp
index 8ac294b7b..c5bea087f 100644
--- a/compiler/luci/partition/src/Nodes/CircleSub.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSub.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSub.test.cpp b/compiler/luci/partition/src/Nodes/CircleSub.test.cpp
index 7c0d83745..ca51865a7 100644
--- a/compiler/luci/partition/src/Nodes/CircleSub.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSub.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleSum.cpp b/compiler/luci/partition/src/Nodes/CircleSum.cpp
index bef1d4676..e929fd090 100644
--- a/compiler/luci/partition/src/Nodes/CircleSum.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSum.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleSum.test.cpp b/compiler/luci/partition/src/Nodes/CircleSum.test.cpp
index 1ed65c04f..21f6bbb74 100644
--- a/compiler/luci/partition/src/Nodes/CircleSum.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSum.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleTanh.cpp b/compiler/luci/partition/src/Nodes/CircleTanh.cpp
index e6c56ebf7..ef5c2c993 100644
--- a/compiler/luci/partition/src/Nodes/CircleTanh.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTanh.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleTanh.test.cpp b/compiler/luci/partition/src/Nodes/CircleTanh.test.cpp
index 17cd48731..1e2d0629c 100644
--- a/compiler/luci/partition/src/Nodes/CircleTanh.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTanh.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleTile.cpp b/compiler/luci/partition/src/Nodes/CircleTile.cpp
index 0381b4dac..0c217436e 100644
--- a/compiler/luci/partition/src/Nodes/CircleTile.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTile.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleTile.test.cpp b/compiler/luci/partition/src/Nodes/CircleTile.test.cpp
index 79d1ba16c..9449c1fa7 100644
--- a/compiler/luci/partition/src/Nodes/CircleTile.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTile.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2.cpp
index ce8a6f5df..41dfa9c22 100644
--- a/compiler/luci/partition/src/Nodes/CircleTopKV2.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTopKV2.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp
index f08f3f315..e0c4a3a84 100644
--- a/compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp
index 6ca6e3d29..19f0fa7bf 100644
--- a/compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp
index a5c1c43f7..ba085f6a9 100644
--- a/compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleTranspose.cpp b/compiler/luci/partition/src/Nodes/CircleTranspose.cpp
index 1cbb54666..cbbdb0090 100644
--- a/compiler/luci/partition/src/Nodes/CircleTranspose.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTranspose.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp b/compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp
index b3b16307c..847683844 100644
--- a/compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp b/compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp
index 469cc9a1a..6b6819d59 100644
--- a/compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp b/compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp
index ee9fb0e78..68adaad81 100644
--- a/compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp
index 3f0374aac..332301455 100644
--- a/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp
index aeefef093..2630461ae 100644
--- a/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleUnique.cpp b/compiler/luci/partition/src/Nodes/CircleUnique.cpp
index 79ca59466..c035b7ed7 100644
--- a/compiler/luci/partition/src/Nodes/CircleUnique.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUnique.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleUnique.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnique.test.cpp
index 23f299840..910087a8b 100644
--- a/compiler/luci/partition/src/Nodes/CircleUnique.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUnique.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp b/compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp
index f244dd6eb..23b1abaa5 100644
--- a/compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp
index 887640790..954957497 100644
--- a/compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleUnpack.cpp b/compiler/luci/partition/src/Nodes/CircleUnpack.cpp
index f83c5d810..43ebcb418 100644
--- a/compiler/luci/partition/src/Nodes/CircleUnpack.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUnpack.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp
index b164cc3bc..444b04373 100644
--- a/compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp b/compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp
index b8982fff5..ee1de153f 100644
--- a/compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp
index 9ed440966..2aaef8d04 100644
--- a/compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleVariable.cpp b/compiler/luci/partition/src/Nodes/CircleVariable.cpp
index f7f6f21fd..e7a794a16 100644
--- a/compiler/luci/partition/src/Nodes/CircleVariable.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleVariable.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace luci
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleWhere.cpp b/compiler/luci/partition/src/Nodes/CircleWhere.cpp
index 8ef274268..d0fc8465d 100644
--- a/compiler/luci/partition/src/Nodes/CircleWhere.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleWhere.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleWhere.test.cpp b/compiler/luci/partition/src/Nodes/CircleWhere.test.cpp
index 942f804c2..f17131c94 100644
--- a/compiler/luci/partition/src/Nodes/CircleWhere.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleWhere.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleWhile.cpp b/compiler/luci/partition/src/Nodes/CircleWhile.cpp
index 7820aca01..95b77f753 100644
--- a/compiler/luci/partition/src/Nodes/CircleWhile.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleWhile.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleWhile.test.cpp b/compiler/luci/partition/src/Nodes/CircleWhile.test.cpp
index bffb7869d..6ee7aba62 100644
--- a/compiler/luci/partition/src/Nodes/CircleWhile.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleWhile.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleWhileOut.cpp b/compiler/luci/partition/src/Nodes/CircleWhileOut.cpp
index 1cb4419db..5cd68355c 100644
--- a/compiler/luci/partition/src/Nodes/CircleWhileOut.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleWhileOut.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp
index 901f31b01..f58eba031 100644
--- a/compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/Nodes/CircleZerosLike.cpp b/compiler/luci/partition/src/Nodes/CircleZerosLike.cpp
index 715042d86..795d88de3 100644
--- a/compiler/luci/partition/src/Nodes/CircleZerosLike.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleZerosLike.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 namespace
 {
diff --git a/compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp b/compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp
index 74c873cb2..f887bc36f 100644
--- a/compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "ConnectNode.test.h"
 
diff --git a/compiler/luci/partition/src/PartitionIR.cpp b/compiler/luci/partition/src/PartitionIR.cpp
index 60dc74f89..969fa7092 100644
--- a/compiler/luci/partition/src/PartitionIR.cpp
+++ b/compiler/luci/partition/src/PartitionIR.cpp
@@ -64,7 +64,7 @@ std::unique_ptr<PGroups> PGroups::make_copy(void) const
     // note: d_pgroup is now nullptr as it's moved
   }
 
-  return std::move(d_pgroups);
+  return d_pgroups;
 }
 
 GroupKey PGroups::group_of(luci::CircleNode *node) const
diff --git a/compiler/luci/partition/src/PartitionMerge.cpp b/compiler/luci/partition/src/PartitionMerge.cpp
index 4c3971bd8..aa8a827cd 100644
--- a/compiler/luci/partition/src/PartitionMerge.cpp
+++ b/compiler/luci/partition/src/PartitionMerge.cpp
@@ -255,7 +255,7 @@ std::unique_ptr<luci::PGroups> merge_pgroups(const luci::PGroups *s_pgroups)
     }
   } while (changed);
 
-  return std::move(d_pgroups);
+  return d_pgroups;
 }
 
 } // namespace luci
diff --git a/compiler/luci/partition/src/PartitionPGroups.cpp b/compiler/luci/partition/src/PartitionPGroups.cpp
index eaeacf9c4..2e95f08f7 100644
--- a/compiler/luci/partition/src/PartitionPGroups.cpp
+++ b/compiler/luci/partition/src/PartitionPGroups.cpp
@@ -257,7 +257,7 @@ std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source,
     }
   }
 
-  return std::move(pgroups);
+  return pgroups;
 }
 
 } // namespace luci
diff --git a/compiler/luci/partition/src/PartitionPModules.cpp b/compiler/luci/partition/src/PartitionPModules.cpp
index beaaf6093..251dbea39 100644
--- a/compiler/luci/partition/src/PartitionPModules.cpp
+++ b/compiler/luci/partition/src/PartitionPModules.cpp
@@ -15,7 +15,7 @@
  */
 
 #include "PartitionPModules.h"
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
 
 #include "luci/Service/CircleNodeClone.h"
 #include "luci/Log.h"
@@ -156,7 +156,7 @@ std::unique_ptr<loco::Graph> clone_graph(loco::Graph *graph_org, luci::CloneCont
     add_graph_output(graph_clone, output_clone);
   }
 
-  return std::move(graph);
+  return graph;
 }
 
 void clone_recursive_subgraphs(luci::PartedModule &pm, loco::Graph *graph,
diff --git a/compiler/luci/pass/CMakeLists.txt b/compiler/luci/pass/CMakeLists.txt
index 5237c6d3f..d9d004db9 100644
--- a/compiler/luci/pass/CMakeLists.txt
+++ b/compiler/luci/pass/CMakeLists.txt
@@ -1,9 +1,16 @@
 nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
+nnas_find_package(Fp16Source QUIET)
+
 if(NOT FlatBuffers_FOUND)
   message(STATUS "FlatBuffers NOT FOUND")
   return()
 endif(NOT FlatBuffers_FOUND)
 
+if(NOT Fp16Source_FOUND)
+  message(STATUS "Fp16Source NOT FOUND")
+  return()
+endif(NOT Fp16Source_FOUND)
+
 file(GLOB_RECURSE SOURCES "src/*.cpp")
 file(GLOB_RECURSE TESTS "src/*.test.cpp")
 list(REMOVE_ITEM SOURCES ${TESTS})
@@ -14,6 +21,7 @@ endif(NOT LUCI_LIBRARY_TYPE)
 
 add_library(luci_pass ${LUCI_LIBRARY_TYPE} ${SOURCES})
 target_include_directories(luci_pass PRIVATE src)
+target_include_directories(luci_pass PRIVATE ${Fp16Source_DIR}/include)
 target_include_directories(luci_pass PUBLIC include)
 target_link_libraries(luci_pass PUBLIC loco)
 target_link_libraries(luci_pass PUBLIC logo_core)
diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h
index c803898f6..b94822c35 100644
--- a/compiler/luci/pass/include/luci/CircleOptimizer.h
+++ b/compiler/luci/pass/include/luci/CircleOptimizer.h
@@ -47,8 +47,10 @@ public:
       ResolveCustomOpBatchMatMul,
       ResolveCustomOpMatMul,
       ResolveCustomOpMaxPoolWithArgmax,
+      ResolveCustomOpSplitV,
       FoldAddV2,
       FoldCast,
+      FoldDensify,
       FoldDepthwiseConv2D,
       FoldDequantize,
       FoldGather,
@@ -61,6 +63,7 @@ public:
       ShuffleWeightTo16x1Float32,
       RemoveRedundantTranspose,
       ReplaceMulAddWithDepthwiseConv,
+      ReplaceNonConstFCWithBatchMatMul,
       ReplaceSubWithAdd,
       SubstitutePackToReshape,
       SubstitutePadV2ToPad,
diff --git a/compiler/luci/pass/include/luci/Pass/FoldDensifyPass.h b/compiler/luci/pass/include/luci/Pass/FoldDensifyPass.h
new file mode 100644
index 000000000..8ec81b1d4
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FoldDensifyPass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_DENSIFY_PASS_H__
+#define __LUCI_FOLD_DENSIFY_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to Fold Densify if input is Sparse Constant
+ *
+ */
+struct FoldDensifyPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FoldDensifyPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_DENSIFY_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveRedundantDequantizePass.h b/compiler/luci/pass/include/luci/Pass/RemoveRedundantDequantizePass.h
new file mode 100644
index 000000000..2deb75297
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveRedundantDequantizePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_REDUNDANT_DEQUANTIZE_PASS_H__
+#define __LUCI_REMOVE_REDUNDANT_DEQUANTIZE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to remove redundant dequantize operations
+ */
+struct RemoveRedundantDequantizePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveRedundantDequantizePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_REDUNDANT_DEQUANTIZE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapeNetPass.h b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapeNetPass.h
new file mode 100644
index 000000000..19948a31c
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapeNetPass.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_UNNECESSARY_RESHAPE_NET_PASS_H__
+#define __LUCI_REMOVE_UNNECESSARY_RESHAPE_NET_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to remove unnecessary Reshape nodes.
+ * @details This class will remove unnecessary pre/post-Reshape nodes.
+ *          See https://github.com/Samsung/ONE/issues/9600 for more details.
+ */
+struct RemoveUnnecessaryReshapeNetPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveUnnecessaryReshapeNetPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_UNNECESSARY_RESHAPE_NET_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h b/compiler/luci/pass/include/luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h
new file mode 100644
index 000000000..24e16ec49
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REPLACE_NONCONST_FC_WITH_BATCH_MATMUL_PASS_H__
+#define __LUCI_REPLACE_NONCONST_FC_WITH_BATCH_MATMUL_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to replace "FC with non-const weight" with Batched MatMul
+ */
+struct ReplaceNonConstFCWithBatchMatMulPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::ReplaceNonConstFCWithBatchMatMulPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REPLACE_NONCONST_FC_WITH_BATCH_MATMUL_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ResolveCustomOpSplitVPass.h b/compiler/luci/pass/include/luci/Pass/ResolveCustomOpSplitVPass.h
new file mode 100644
index 000000000..d4f0147e8
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ResolveCustomOpSplitVPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_RESOLVE_CUSTOM_OP_SPLIT_V_PASS_H__
+#define __LUCI_RESOLVE_CUSTOM_OP_SPLIT_V_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to resolve certain custom op of subgraph into splitv op in circle schema.
+ */
+struct ResolveCustomOpSplitVPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::ResolveCustomOpSplitVPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_RESOLVE_CUSTOM_OP_SPLIT_V_PASS_H__
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp
index 6dbb22d7c..74c569d20 100644
--- a/compiler/luci/pass/src/CircleOptimizer.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -20,6 +20,7 @@
 #include "luci/Pass/ExpandBroadcastConstPass.h"
 #include "luci/Pass/FoldAddV2Pass.h"
 #include "luci/Pass/FoldCastPass.h"
+#include "luci/Pass/FoldDensifyPass.h"
 #include "luci/Pass/FoldDepthwiseConv2DPass.h"
 #include "luci/Pass/FoldDequantizePass.h"
 #include "luci/Pass/FoldGatherPass.h"
@@ -43,15 +44,18 @@
 #include "luci/Pass/RemoveRedundantTransposePass.h"
 #include "luci/Pass/RemoveRedundantQuantizePass.h"
 #include "luci/Pass/RemoveUnnecessaryReshapePass.h"
+#include "luci/Pass/RemoveUnnecessaryReshapeNetPass.h"
 #include "luci/Pass/RemoveUnnecessarySlicePass.h"
 #include "luci/Pass/RemoveUnnecessaryStridedSlicePass.h"
 #include "luci/Pass/RemoveUnnecessarySplitPass.h"
+#include "luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h"
 #include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h"
 #include "luci/Pass/ReplaceSubWithAddPass.h"
 #include "luci/Pass/ResolveCustomOpAddPass.h"
 #include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
 #include "luci/Pass/ResolveCustomOpMatMulPass.h"
 #include "luci/Pass/ResolveCustomOpMaxPoolWithArgmaxPass.h"
+#include "luci/Pass/ResolveCustomOpSplitVPass.h"
 #include "luci/Pass/SparsifyTensorPass.h"
 #include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h"
 #include "luci/Pass/SubstitutePackToReshapePass.h"
@@ -127,7 +131,8 @@ bool OptimizeOptionsImpl::query(Algorithm algo)
   return true;
 }
 
-void convert_nchw_to_nhwc(loco::Graph *g, bool preserve_input, bool preserve_output)
+// TODO Make a struct for args
+void convert_nchw_to_nhwc(loco::Graph *g, bool preserve_input, bool preserve_output, bool fuse_fc)
 {
   logo::Phase phase;
 
@@ -135,6 +140,21 @@ void convert_nchw_to_nhwc(loco::Graph *g, bool preserve_input, bool preserve_out
   phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
   phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
 
+  // Resolve custom Ops
+  phase.emplace_back(std::make_unique<luci::ResolveCustomOpAddPass>());
+  phase.emplace_back(std::make_unique<luci::ResolveCustomOpBatchMatMulPass>());
+  phase.emplace_back(std::make_unique<luci::ResolveCustomOpMatMulPass>());
+  phase.emplace_back(std::make_unique<luci::ResolveCustomOpMaxPoolWithArgmaxPass>());
+  phase.emplace_back(std::make_unique<luci::ResolveCustomOpSplitVPass>());
+
+  // Fuse FullyConnected with Add
+  // Why we perform FuseAddWithFullyConnectedPass before ConvertNCHWToNHWCPass?
+  // FullyConnected Op's layout is not changed in ConvertNCHWToNHWCPass, while
+  // Add Op's layer is changed from NCHW to NHWC.
+  // This disables fusion of Add and FullyConnected after ConvertNCHWToNHWC.
+  if (fuse_fc)
+    phase.emplace_back(std::make_unique<luci::FuseAddWithFullyConnectedPass>());
+
   phase.emplace_back(
     std::make_unique<luci::ConvertNCHWToNHWCPass>(preserve_input, preserve_output));
 
@@ -190,7 +210,9 @@ void CircleOptimizer::optimize(loco::Graph *g) const
     bool preserve_output =
       _options->param(Options::AlgorithmParameters::NCHW_to_NHWC_output_shape) != "true";
 
-    convert_nchw_to_nhwc(g, preserve_input, preserve_output);
+    bool fuse_fc = _options->query(Options::Algorithm::FuseAddWithFullyConnected);
+
+    convert_nchw_to_nhwc(g, preserve_input, preserve_output, fuse_fc);
   }
 
   /* TRANSFORM DECLARATION BEGIN */
@@ -220,6 +242,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
   {
     phase.emplace_back(std::make_unique<luci::ResolveCustomOpMaxPoolWithArgmaxPass>());
   }
+  if (_options->query(Options::Algorithm::ResolveCustomOpSplitV))
+  {
+    phase.emplace_back(std::make_unique<luci::ResolveCustomOpSplitVPass>());
+  }
   if (_options->query(Options::Algorithm::FuseInstanceNorm))
   {
     phase.emplace_back(std::make_unique<FuseInstanceNormPass>());
@@ -260,6 +286,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
   {
     phase.emplace_back(std::make_unique<luci::FoldCastPass>());
   }
+  if (_options->query(Options::Algorithm::FoldDensify))
+  {
+    phase.emplace_back(std::make_unique<luci::FoldDensifyPass>());
+  }
   if (_options->query(Options::Algorithm::FoldDepthwiseConv2D))
   {
     phase.emplace_back(std::make_unique<luci::FoldDepthwiseConv2DPass>());
@@ -307,6 +337,7 @@ void CircleOptimizer::optimize(loco::Graph *g) const
   if (_options->query(Options::Algorithm::RemoveUnnecessaryReshape))
   {
     phase.emplace_back(std::make_unique<luci::RemoveUnnecessaryReshapePass>());
+    phase.emplace_back(std::make_unique<luci::RemoveUnnecessaryReshapeNetPass>());
   }
   if (_options->query(Options::Algorithm::RemoveUnnecessarySlice))
   {
@@ -332,6 +363,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
   {
     phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>());
   }
+  if (_options->query(Options::Algorithm::ReplaceNonConstFCWithBatchMatMul))
+  {
+    phase.emplace_back(std::make_unique<luci::ReplaceNonConstFCWithBatchMatMulPass>());
+  }
   if (_options->query(Options::Algorithm::ReplaceMulAddWithDepthwiseConv))
   {
     phase.emplace_back(std::make_unique<luci::ReplaceMulAddWithDepthwiseConvPass>());
diff --git a/compiler/luci/pass/src/CircleQuantizer.cpp b/compiler/luci/pass/src/CircleQuantizer.cpp
index ce38a90b9..9a6550b9f 100644
--- a/compiler/luci/pass/src/CircleQuantizer.cpp
+++ b/compiler/luci/pass/src/CircleQuantizer.cpp
@@ -22,6 +22,7 @@
 #include "luci/Pass/RequantizePass.h"
 #include "luci/Pass/ConvertToFakeQuantizedModelPass.h"
 #include "luci/Pass/FoldDequantizePass.h"
+#include "luci/Pass/RemoveRedundantDequantizePass.h"
 #include "luci/Pass/QuantizePreCheckerPass.h"
 #include "luci/Pass/QuantizeWithMinMaxPass.h"
 #include "luci/Pass/QuantizeDequantizeWeightsPass.h"
@@ -252,8 +253,8 @@ void CircleQuantizer::quantize(loco::Graph *g) const
     static const std::vector<std::string> qwmm_supported_input_model_dtype{"float32"};
     static const std::vector<std::string> qwmm_supported_output_model_dtype{"uint8", "int16"};
     static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"};
-    static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16"};
-    static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16"};
+    static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16", "float32"};
+    static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16", "float32"};
 
     auto input_model_dtype =
       _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
@@ -434,6 +435,8 @@ void CircleQuantizer::quantize(loco::Graph *g) const
     phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
     phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
 
+    // Remove redundant Dequantize Ops generated during fake quantization
+    phase.emplace_back(std::make_unique<luci::RemoveRedundantDequantizePass>());
     // Fold Dequantize Ops generated during fake quantization
     phase.emplace_back(std::make_unique<luci::FoldDequantizePass>());
 
diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
index ce4f54035..55a29d105 100644
--- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
+++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
@@ -28,6 +28,69 @@
 namespace
 {
 
+// Return true if from can be broadcasted to to
+// to's shape is [N, C, H, W]
+bool broadcastable(const luci::CircleConst *from, const luci::CircleNode *to)
+{
+  assert(to->rank() == 4); // FIX_CALLER_UNLESS
+
+  const auto from_rank = from->rank();
+  if (from_rank > 4)
+    return false;
+
+  // Scalar is always broadcastable
+  if (from_rank == 0)
+    return true;
+
+  for (uint32_t i = 1; i <= from_rank; i++)
+  {
+    auto to_index = 4 - i;
+    auto from_index = from_rank - i;
+
+    if (from->dim(from_index).value() != to->dim(to_index).value() and
+        from->dim(from_index).value() != 1)
+      return false;
+  }
+
+  return true;
+}
+
+// Expand node to rank 4
+// node should have rank less than or equal to 4
+void expand_to_rank_4(luci::CircleConst *node)
+{
+  auto original_rank = node->rank();
+
+  assert(original_rank <= 4); // FIX_CALLER_UNLESS
+
+  if (original_rank == 4)
+    return;
+
+  std::vector<uint32_t> original_shape;
+  for (uint32_t i = 0; i < original_rank; i++)
+  {
+    original_shape.emplace_back(node->dim(i).value());
+  }
+
+  node->rank(4);
+  for (uint32_t i = 0; i < (4 - original_rank); i++)
+    node->dim(i) = 1;
+
+  for (uint32_t i = 0; i < original_rank; i++)
+    node->dim(i + (4 - original_rank)) = original_shape.at(i);
+}
+
+bool is_output(const loco::Node *node)
+{
+  auto cnode = loco::must_cast<const luci::CircleNode *>(node);
+  auto opcode = cnode->opcode();
+  if (opcode == luci::CircleOpcode::CIRCLEOUTPUT ||
+      opcode == luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+    return true;
+
+  return false;
+}
+
 bool is_same_shape(const luci::CircleNode *node, const std::vector<loco::Dimension> &shape)
 {
   if (not node)
@@ -484,7 +547,7 @@ bool is_NCHW_with_s_const(const T *node, luci::CircleNode *&pred_node,
 //
 // Find MUL with an NCHW pattern described below
 //   - Input (non-constant) shape : [N, C, H, W]
-//   - Input (constant) shape : [1, C, 1, 1], [N, C, H, W] or a scalar (1)
+//   - Input (constant) shape : broadcastable to [N, C, H, W]
 //   - Output shape : [N, C, H, W]
 bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_node,
                         luci::CircleConst *&multiplier)
@@ -511,32 +574,12 @@ bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_nod
   if (pred_node->rank() != 4)
     return false;
 
-  const auto const_rank = multiplier->rank();
-  // Support Rank 4 or scalar (rank 0 or 1)
-  if (const_rank != 4 && const_rank != 0 && const_rank != 1)
+  if (not broadcastable(multiplier, node))
     return false;
 
-  const auto input_cdim = pred_node->dim(1);
-  const auto output_cdim = node->dim(1);
-
-  if (const_rank == 4)
-  {
-    bool supported_shape = false;
-
-    // Check multiplier is (1, C, 1, 1)
-    if (is_same_shape(multiplier, {1, node->dim(1), 1, 1}))
-      supported_shape = true;
-
-    // Check multiplier is (N, C, H, W)
-    if (is_same_shape(multiplier, {node->dim(0), node->dim(1), node->dim(2), node->dim(3)}))
-      supported_shape = true;
+  expand_to_rank_4(multiplier);
 
-    return supported_shape;
-  }
-  if (input_cdim == output_cdim)
-    return true;
-  else
-    return false;
+  return true;
 }
 
 // We assume ADD with const input is NCHW if,
@@ -569,32 +612,12 @@ bool is_NCHW_with_const(const luci::CircleAdd *node, luci::CircleNode *&pred_nod
   if (pred_node->rank() != 4)
     return false;
 
-  const auto const_rank = beta->rank();
-  // Support Rank 4 or scalar (rank 0 or 1)
-  if (const_rank != 4 && const_rank != 0 && const_rank != 1)
+  if (not broadcastable(beta, node))
     return false;
 
-  const auto input_cdim = pred_node->dim(1);
-  const auto output_cdim = node->dim(1);
-
-  if (const_rank == 4)
-  {
-    bool supported_shape = false;
-
-    // Check beta is (1, C, 1, 1)
-    if (is_same_shape(beta, {1, node->dim(1), 1, 1}))
-      supported_shape = true;
-
-    // Check beta is (N, C, H, W)
-    if (is_same_shape(beta, {node->dim(0), node->dim(1), node->dim(2), node->dim(3)}))
-      supported_shape = true;
+  expand_to_rank_4(beta);
 
-    return supported_shape;
-  }
-  if (input_cdim == output_cdim)
-    return true;
-  else
-    return false;
+  return true;
 }
 
 // We assume SUB with const input is NCHW if,
@@ -675,6 +698,24 @@ template <class T> bool convert_unary_x(T *node)
   return true;
 }
 
+template <class T> bool convert_unary_logits(T *node)
+{
+  const auto pred_node = loco::must_cast<luci::CircleNode *>(node->logits());
+  auto pre_trans = create_pre_transpose(node);
+  pre_trans->a(pred_node);
+  node->logits(pre_trans);
+
+  // Do shape inference for this node again.
+  node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+  auto post_trans = create_post_transpose(node);
+  loco::replace(node).with(post_trans);
+
+  post_trans->a(node);
+
+  return true;
+}
+
 class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool>
 {
   // Default
@@ -742,17 +783,14 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool>
 
     if (is_NCHW_with_const(node, pred_node, beta))
     {
+      assert(beta->rank() == 4); // FIX is_NCHW_with_const unless
+      auto nhwc_const = create_NHWC_from_NCHW(beta);
+      if (nhwc_const == nullptr)
+        return false;
+      node->y(nhwc_const);
+
       auto pre_trans = create_pre_transpose(node);
       pre_trans->a(pred_node);
-
-      if (beta->rank() == 4)
-      {
-        auto nhwc_const = create_NHWC_from_NCHW(beta);
-        if (nhwc_const == nullptr)
-          return false;
-        node->y(nhwc_const);
-      }
-
       node->x(pre_trans);
     }
     else if (beta == nullptr)
@@ -816,6 +854,11 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool>
 
   bool visit(luci::CircleLogistic *node) { return convert_unary_x<luci::CircleLogistic>(node); }
 
+  bool visit(luci::CircleLogSoftmax *node)
+  {
+    return convert_unary_logits<luci::CircleLogSoftmax>(node);
+  }
+
   bool visit(luci::CircleMaximum *node)
   {
     luci::CircleNode *pred_node = nullptr;
@@ -954,15 +997,15 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool>
 
     if (is_NCHW_with_const(node, pred_node, multiplier))
     {
+      assert(multiplier->rank() == 4); // FIX is_NCHW_with_const unless
+      auto nhwc_const = create_NHWC_from_NCHW(multiplier);
+      if (nhwc_const == nullptr)
+        return false;
+      node->y(nhwc_const);
+
       auto pre_trans = create_pre_transpose(node);
       pre_trans->a(pred_node);
       node->x(pre_trans);
-
-      if (multiplier->rank() == 4)
-      {
-        auto nhwc_const = create_NHWC_from_NCHW(multiplier);
-        node->y(nhwc_const);
-      }
     }
     else if (multiplier == nullptr)
     {
@@ -1049,12 +1092,127 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool>
     return true;
   }
 
+  // TODO Reduce duplicate code with CircleMean
+  bool visit(luci::CircleReduceMax *node)
+  {
+    auto input = loco::must_cast<luci::CircleNode *>(node->input());
+    if (input->rank() != 4)
+      return false;
+
+    auto rindices = dynamic_cast<luci::CircleConst *>(node->reduction_indices());
+    if (not rindices)
+      return false;
+
+    auto nhwc_rindices = create_NHWC_rindices(rindices);
+    if (not nhwc_rindices)
+      return false;
+
+    auto pre_trans = create_pre_transpose(node);
+    pre_trans->a(input);
+    node->input(pre_trans);
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    node->reduction_indices(nhwc_rindices);
+
+    if (node->keep_dims())
+    {
+      auto post_trans = create_post_transpose(node);
+      loco::replace(node).with(post_trans);
+
+      post_trans->a(node);
+
+      return true;
+    }
+
+    // The below codes handle the cases where node->keep_dims() == false
+    // 1D output never needs a transpose
+    if (node->rank() <= 1)
+      return true;
+
+    std::vector<bool> reduced_dims_nhwc(4, false);
+    uint32_t num_reduced_indices = nhwc_rindices->size<loco::DataType::S32>();
+
+    for (uint32_t ri = 0; ri < num_reduced_indices; ++ri)
+    {
+      reduced_dims_nhwc[nhwc_rindices->at<loco::DataType::S32>(ri)] = true;
+    }
+
+    // if channel dimension has been reduced, we don't need a transpose
+    if (reduced_dims_nhwc[3])
+      return true;
+
+    // likewise, if both space dimensions are reduced, no transpose is needed
+    if (reduced_dims_nhwc[1] && reduced_dims_nhwc[2])
+      return true;
+
+    std::vector<int32_t> post_trans_ind;
+    // case 1: only N is reduced
+    if (num_reduced_indices == 1 && reduced_dims_nhwc[0])
+      post_trans_ind = {2, 0, 1};
+
+    // case 2: only H or W is reduced
+    if (num_reduced_indices == 1 && (reduced_dims_nhwc[1] || reduced_dims_nhwc[2]))
+      post_trans_ind = {0, 2, 1};
+
+    // case 3: N and either H or W are reduced
+    if (num_reduced_indices == 2)
+      post_trans_ind = {1, 0};
+
+    auto post_trans = create_Nd_transpose(node, post_trans_ind);
+    loco::replace(node).with(post_trans);
+
+    post_trans->a(node);
+
+    return true;
+  }
+
   bool visit(luci::CircleRelu *node) { return convert_unary_features<luci::CircleRelu>(node); }
 
   bool visit(luci::CircleRelu6 *node) { return convert_unary_features<luci::CircleRelu6>(node); }
 
   bool visit(luci::CircleRsqrt *node) { return convert_unary_x<luci::CircleRsqrt>(node); }
 
+  bool visit(luci::CircleSoftmax *node) { return convert_unary_logits<luci::CircleSoftmax>(node); }
+
+  bool visit(luci::CircleSplitV *node)
+  {
+    // Change split dimension
+    auto axis = dynamic_cast<luci::CircleConst *>(node->split_dim());
+    if (not axis)
+      return false;
+
+    if (axis->dtype() != loco::DataType::S32)
+      return false;
+
+    if (axis->size<loco::DataType::S32>() != 1)
+      return false;
+
+    axis->at<loco::DataType::S32>(0) = nchw_axis_to_nhwc(axis->at<loco::DataType::S32>(0));
+
+    // Insert pre-transpose
+    const auto pred_node = loco::must_cast<luci::CircleNode *>(node->input());
+    auto pre_trans = create_pre_transpose(node);
+    pre_trans->a(pred_node);
+    node->input(pre_trans);
+
+    // Do shape inference for this node again.
+    node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+    // Insert post-transposes
+    for (auto succ : loco::succs(node))
+    {
+      auto svo = loco::must_cast<luci::CircleSplitVOut *>(succ);
+
+      auto post_trans = create_post_transpose(svo);
+      loco::replace(svo).with(post_trans);
+      post_trans->a(svo);
+    }
+
+    return true;
+  }
+
   bool visit(luci::CircleSquaredDifference *node)
   {
     // TODO support CircleConst input
@@ -1195,6 +1353,8 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g)
   // pre-Transpose --- [intermediate Ops] --- post-Transpose
   //                |
   //                +--[intermediate Ops] --- post-Transpose
+  //
+  // NOTE Intermediate Ops SHOULD NOT contain pre-Transpose/Reshape
   for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
   {
     if (has_data_format(node))
@@ -1202,25 +1362,51 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g)
 
     if (is_pre_transpose(node) || is_pre_reshape(node))
     {
+      std::set<loco::Node *> intermediate;
+
+      // Variable to check intermediate Ops contain pre-Transpose/Reshape
+      bool has_pre = false;
+
+      // Variable to check the pattern is closed with post-Transpose/Reshape
+      bool is_closed = true;
+
       // For recursive call of lambda
-      std::function<void(loco::Node *)> set_data_format_to_succs;
-      set_data_format_to_succs = [&](loco::Node *n) {
+      std::function<void(loco::Node *)> collect_intermediate;
+      collect_intermediate = [&](loco::Node *n) {
         for (auto succ : loco::succs(n))
         {
           // Exit condition
           if (is_post_transpose(succ) || is_post_reshape(succ))
             continue;
 
-          if (not has_data_format(succ))
+          if (is_pre_transpose(succ) || is_pre_reshape(succ))
+          {
+            has_pre = true;
+            break;
+          }
+
+          if (is_output(succ))
           {
-            set_data_format(succ, DataFormat::NHWC);
+            is_closed = false;
+            break;
           }
 
-          set_data_format_to_succs(succ);
+          intermediate.emplace(succ);
+
+          collect_intermediate(succ);
         }
       };
 
-      set_data_format_to_succs(node);
+      collect_intermediate(node);
+
+      if (has_pre or not is_closed)
+        continue;
+
+      for (auto inter : intermediate)
+      {
+        if (not has_data_format(inter))
+          set_data_format(inter, DataFormat::NHWC);
+      }
     }
   }
 
@@ -1248,6 +1434,7 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g)
       case luci::CircleOpcode::ELU:
       case luci::CircleOpcode::LEAKY_RELU:
       case luci::CircleOpcode::LOGISTIC:
+      case luci::CircleOpcode::LOG_SOFTMAX:
       case luci::CircleOpcode::MAXIMUM:
       case luci::CircleOpcode::MEAN:
       case luci::CircleOpcode::MINIMUM:
@@ -1255,9 +1442,12 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g)
       case luci::CircleOpcode::NEG:
       case luci::CircleOpcode::PAD:
       case luci::CircleOpcode::PADV2:
+      case luci::CircleOpcode::REDUCE_MAX:
       case luci::CircleOpcode::RELU:
       case luci::CircleOpcode::RELU6:
       case luci::CircleOpcode::RSQRT:
+      case luci::CircleOpcode::SOFTMAX:
+      case luci::CircleOpcode::SPLIT_V:
       case luci::CircleOpcode::SQUARED_DIFFERENCE:
       case luci::CircleOpcode::SUB:
         if (!has_data_format(node))
@@ -1296,7 +1486,8 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g)
       if (circle_node->rank() != 4)
       {
         // TODO replace the check above with the input rank check, and remove the condition below
-        if (not dynamic_cast<luci::CircleMean *>(node))
+        if (not dynamic_cast<luci::CircleMean *>(node) and
+            not dynamic_cast<luci::CircleReduceMax *>(node))
           continue;
       }
 
diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
index dd81d1380..6bb3d3268 100644
--- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
+++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
@@ -16,6 +16,8 @@
 
 #include <logo/Phase.h>
 
+#include <luci/test/TestIOGraph.h>
+
 #include "luci/Pass/ConvertNCHWToNHWCPass.h"
 #include "luci/Pass/CircleShapeInferencePass.h"
 
@@ -23,6 +25,8 @@
 
 #include <gtest/gtest.h>
 
+using namespace luci::test;
+
 namespace
 {
 
@@ -202,6 +206,173 @@ public:
   luci::CircleConst *post_shape = nullptr;
 };
 
+/**
+ *  Graph with pre-Reshape but no post-Transpose/Reshape.
+ *
+ *  BEFORE
+ *             [Input]
+ *                |
+ *          [Pre-Reshape]
+ *                |
+ *              [Relu]
+ *                |
+ *             [Output]
+ *
+ *  AFTER
+ *             [Input]
+ *                |
+ *          [Pre-Reshape]
+ *                |
+ *          [Pre-Transpose]
+ *                |
+ *              [Relu]
+ *                |
+ *          [Post-Transpose]
+ *                |
+ *             [Output]
+ */
+class NoPostReshapeGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    relu = g.nodes()->create<luci::CircleRelu>();
+    pre_reshape = g.nodes()->create<luci::CircleReshape>();
+    pre_shape = g.nodes()->create<luci::CircleConst>();
+
+    pre_shape->dtype(loco::DataType::S32);
+
+    uint32_t channel_size = 16;
+    auto in = loco::must_cast<luci::CircleNode *>(input);
+    in->shape({1, channel_size, 4, 4});
+    pre_shape->shape({4});
+
+    pre_shape->size<loco::DataType::S32>(4);
+    pre_shape->at<loco::DataType::S32>(0) = 1;
+    pre_shape->at<loco::DataType::S32>(1) = 4;
+    pre_shape->at<loco::DataType::S32>(2) = 4;
+    pre_shape->at<loco::DataType::S32>(3) = channel_size;
+
+    pre_reshape->tensor(input);
+    pre_reshape->shape(pre_shape);
+    relu->features(pre_reshape);
+
+    relu->name("Relu");
+    pre_reshape->name("pre-reshape");
+
+    return relu;
+  }
+
+public:
+  luci::CircleRelu *relu = nullptr;
+  luci::CircleReshape *pre_reshape = nullptr;
+  luci::CircleConst *pre_shape = nullptr;
+};
+
+/**
+ *  Graph with two pre-Reshapes
+ *
+ *  BEFORE
+ *             [Input]
+ *                |
+ *          [Pre-Reshape]
+ *                |
+ *              [Relu]
+ *                |
+ *          [Pre-Reshape]
+ *                |
+ *          [Post-Reshape]
+ *                |
+ *             [Output]
+ *
+ *  AFTER
+ *             [Input]
+ *                |
+ *          [Pre-Reshape]
+ *                |
+ *          [Pre-Transpose]
+ *                |
+ *              [Relu]
+ *                |
+ *          [Post-Transpose]
+ *                |
+ *          [Pre-Reshape]
+ *                |
+ *          [Post-Reshape]
+ *                |
+ *             [Output]
+ */
+class ReluNotClosedGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    relu = g.nodes()->create<luci::CircleRelu>();
+    pre_reshape = g.nodes()->create<luci::CircleReshape>();
+    pre_reshape_2 = g.nodes()->create<luci::CircleReshape>();
+    post_reshape = g.nodes()->create<luci::CircleReshape>();
+    pre_shape = g.nodes()->create<luci::CircleConst>();
+    pre_shape_2 = g.nodes()->create<luci::CircleConst>();
+    post_shape = g.nodes()->create<luci::CircleConst>();
+
+    pre_shape->dtype(loco::DataType::S32);
+    pre_shape_2->dtype(loco::DataType::S32);
+    post_shape->dtype(loco::DataType::S32);
+
+    uint32_t channel_size = 16;
+    auto in = loco::must_cast<luci::CircleNode *>(input);
+    in->shape({1, channel_size, 4, 4});
+    pre_shape->shape({4});
+    pre_shape_2->shape({4});
+    post_shape->shape({4});
+
+    pre_shape->size<loco::DataType::S32>(4);
+    pre_shape->at<loco::DataType::S32>(0) = 1;
+    pre_shape->at<loco::DataType::S32>(1) = 4;
+    pre_shape->at<loco::DataType::S32>(2) = 4;
+    pre_shape->at<loco::DataType::S32>(3) = channel_size;
+
+    pre_shape_2->size<loco::DataType::S32>(4);
+    pre_shape_2->at<loco::DataType::S32>(0) = 1;
+    pre_shape_2->at<loco::DataType::S32>(1) = 4;
+    pre_shape_2->at<loco::DataType::S32>(2) = channel_size;
+    pre_shape_2->at<loco::DataType::S32>(3) = 4;
+
+    post_shape->size<loco::DataType::S32>(4);
+    post_shape->at<loco::DataType::S32>(0) = 1;
+    post_shape->at<loco::DataType::S32>(1) = 4;
+    post_shape->at<loco::DataType::S32>(2) = 4;
+    post_shape->at<loco::DataType::S32>(3) = channel_size;
+
+    pre_reshape->tensor(input);
+    pre_reshape->shape(pre_shape);
+
+    relu->features(pre_reshape);
+
+    pre_reshape_2->tensor(relu);
+    pre_reshape_2->shape(pre_shape_2);
+
+    post_reshape->tensor(pre_reshape_2);
+    post_reshape->shape(post_shape);
+
+    relu->name("Relu");
+    pre_reshape->name("pre-reshape");
+    pre_reshape->name("pre-reshape-2");
+    post_reshape->name("post-reshape");
+
+    return post_reshape;
+  }
+
+public:
+  luci::CircleRelu *relu = nullptr;
+  luci::CircleReshape *pre_reshape = nullptr;
+  luci::CircleReshape *pre_reshape_2 = nullptr;
+  luci::CircleReshape *post_reshape = nullptr;
+  luci::CircleConst *pre_shape = nullptr;
+  luci::CircleConst *pre_shape_2 = nullptr;
+  luci::CircleConst *post_shape = nullptr;
+};
+
 class AddScalarGraph final : public SimpleGraph
 {
 protected:
@@ -312,6 +483,22 @@ public:
   luci::CircleLogistic *logistic = nullptr;
 };
 
+class LogSoftmaxGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    log_softmax = g.nodes()->create<luci::CircleLogSoftmax>();
+    log_softmax->logits(input);
+    log_softmax->name("log_softmax");
+
+    return log_softmax;
+  }
+
+public:
+  luci::CircleLogSoftmax *log_softmax = nullptr;
+};
+
 class MaximumGraph final : public SimpleGraph
 {
 protected:
@@ -642,6 +829,51 @@ public:
   luci::CircleConst *const_value = nullptr;
 };
 
+class ReduceMaxGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    rm = g.nodes()->create<luci::CircleReduceMax>();
+    rindices = g.nodes()->create<luci::CircleConst>();
+
+    rm->dtype(loco::DataType::FLOAT32);
+    rindices->dtype(loco::DataType::S32);
+
+    rm->shape(_shape);
+    rindices->shape({static_cast<uint32_t>(_axes.size())});
+
+    rindices->size<loco::DataType::S32>(_axes.size());
+    for (uint32_t i = 0; i < _axes.size(); ++i)
+    {
+      rindices->at<loco::DataType::S32>(i) = _axes[i];
+    }
+
+    rm->input(input);
+    rm->reduction_indices(rindices);
+    rm->keep_dims(_keep_dims);
+
+    rm->name("reduce_max");
+    rindices->name("rindices");
+
+    return rm;
+  }
+
+public:
+  void keep_dims(bool val) { _keep_dims = val; }
+  void axes(std::vector<int32_t> val) { _axes = val; }
+  void shape(std::initializer_list<uint32_t> val) { _shape = val; }
+
+public:
+  luci::CircleReduceMax *rm = nullptr;
+  luci::CircleConst *rindices = nullptr;
+
+private:
+  bool _keep_dims = true;
+  std::vector<int32_t> _axes = {2, 3};
+  std::initializer_list<uint32_t> _shape = {1, 16, 1, 1};
+};
+
 class ReluGraph final : public SimpleGraph
 {
 protected:
@@ -690,6 +922,111 @@ public:
   luci::CircleRsqrt *rsqrt = nullptr;
 };
 
+class SoftmaxGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    softmax = g.nodes()->create<luci::CircleSoftmax>();
+    softmax->logits(input);
+    softmax->name("softmax");
+
+    return softmax;
+  }
+
+public:
+  luci::CircleSoftmax *softmax = nullptr;
+};
+
+class SplitVGraphlet
+{
+public:
+  SplitVGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    // CircleCustom(SplitV)
+    _splitv = g->nodes()->create<luci::CircleSplitV>();
+    _splitv->shape({1, 2, 2, 192});
+    _splitv->dtype(loco::DataType::FLOAT32);
+    _splitv->name("splitv");
+
+    // CircleConst
+    auto size_splits = g->nodes()->create<luci::CircleConst>();
+    size_splits->dtype(loco::DataType::S32);
+    size_splits->shape({3});
+    size_splits->size<loco::DataType::S32>(3);
+    size_splits->at<loco::DataType::S32>(0) = 32;
+    size_splits->at<loco::DataType::S32>(1) = 32;
+    size_splits->at<loco::DataType::S32>(2) = 128;
+
+    // CircleConst
+    auto split_dim = g->nodes()->create<luci::CircleConst>();
+    split_dim->dtype(loco::DataType::S32);
+    split_dim->rank(0);
+    split_dim->size<loco::DataType::S32>(1);
+    split_dim->scalar<loco::DataType::S32>() = 3;
+
+    _splitv->size_splits(size_splits);
+    _splitv->split_dim(split_dim);
+    _splitv->num_split(3);
+
+    // CircleSplitVOut
+    _splitv_out1 = g->nodes()->create<luci::CircleSplitVOut>();
+    _splitv_out1->shape({1, 2, 2, 32});
+    _splitv_out1->dtype(loco::DataType::FLOAT32);
+    _splitv_out1->index(0);
+    _splitv_out1->input(_splitv);
+    _splitv_out1->name("splitv_out1");
+
+    // CircleSplitVOut
+    _splitv_out2 = g->nodes()->create<luci::CircleSplitVOut>();
+    _splitv_out2->shape({1, 2, 2, 32});
+    _splitv_out2->dtype(loco::DataType::FLOAT32);
+    _splitv_out2->index(1);
+    _splitv_out2->input(_splitv);
+    _splitv_out2->name("splitv_out2");
+
+    // CircleSplitVOut
+    _splitv_out3 = g->nodes()->create<luci::CircleSplitVOut>();
+    _splitv_out3->shape({1, 2, 2, 128});
+    _splitv_out3->dtype(loco::DataType::FLOAT32);
+    _splitv_out3->index(2);
+    _splitv_out3->input(_splitv);
+    _splitv_out3->name("splitv_out3");
+  }
+
+public:
+  luci::CircleSplitV *splitv() { return _splitv; }
+
+protected:
+  luci::CircleSplitV *_splitv = nullptr;
+  luci::CircleSplitVOut *_splitv_out1 = nullptr;
+  luci::CircleSplitVOut *_splitv_out2 = nullptr;
+  luci::CircleSplitVOut *_splitv_out3 = nullptr;
+};
+
+class SplitVGraph : public TestIGraphlet, public TestOsGraphlet<3>, public SplitVGraphlet
+{
+public:
+  SplitVGraph() = default;
+
+  void init(void)
+  {
+    TestIGraphlet::init(g(), {1, 2, 2, 192});
+    TestOsGraphlet<3>::init(g(), {{1, 2, 2, 32}, {1, 2, 2, 32}, {1, 2, 2, 128}});
+    SplitVGraphlet::init(g());
+
+    // connect graph
+    _splitv->input(input());
+
+    output(0)->from(_splitv_out1);
+    output(1)->from(_splitv_out2);
+    output(2)->from(_splitv_out3);
+  }
+};
+
 class SquaredDifferenceGraph final : public SimpleGraph
 {
 protected:
@@ -929,8 +1266,11 @@ TEST(ConvertNCHWToNHWC, AddScalar)
 
   auto new_beta = dynamic_cast<luci::CircleConst *>(g.add->y());
   EXPECT_NE(nullptr, new_beta);
-  EXPECT_EQ(1, new_beta->rank());
+  EXPECT_EQ(4, new_beta->rank());
   EXPECT_EQ(1, new_beta->dim(0).value());
+  EXPECT_EQ(1, new_beta->dim(1).value());
+  EXPECT_EQ(1, new_beta->dim(2).value());
+  EXPECT_EQ(1, new_beta->dim(3).value());
 
   check_pre_trans(g.output->from());
 }
@@ -1017,6 +1357,26 @@ TEST(ConvertNCHWToNHWC, Logistic)
   EXPECT_EQ(16, g.logistic->dim(3).value());
 }
 
+TEST(ConvertNCHWToNHWC, LogSoftmax)
+{
+  LogSoftmaxGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.log_softmax->logits());
+
+  auto log_softmax_succs = loco::succs(g.log_softmax);
+  EXPECT_EQ(1, log_softmax_succs.size());
+  check_post_trans(*log_softmax_succs.begin());
+
+  // Check log_softmax shape
+  EXPECT_EQ(1, g.log_softmax->dim(0).value());
+  EXPECT_EQ(4, g.log_softmax->dim(1).value());
+  EXPECT_EQ(4, g.log_softmax->dim(2).value());
+  EXPECT_EQ(16, g.log_softmax->dim(3).value());
+}
+
 TEST(ConvertNCHWToNHWC, Maximum)
 {
   MaximumGraph g;
@@ -1265,8 +1625,11 @@ TEST(ConvertNCHWToNHWC, MulScalar)
 
   auto new_multiplier = dynamic_cast<luci::CircleConst *>(g.mul->y());
   EXPECT_NE(nullptr, new_multiplier);
-  EXPECT_EQ(1, new_multiplier->rank());
+  EXPECT_EQ(4, new_multiplier->rank());
   EXPECT_EQ(1, new_multiplier->dim(0).value());
+  EXPECT_EQ(1, new_multiplier->dim(1).value());
+  EXPECT_EQ(1, new_multiplier->dim(2).value());
+  EXPECT_EQ(1, new_multiplier->dim(3).value());
 
   check_pre_trans(g.output->from());
 }
@@ -1451,6 +1814,85 @@ TEST(ConvertNCHWToNHWC, Preserve_Input_Output)
   }
 }
 
+TEST(ConvertNCHWToNHWC, ReduceMax)
+{
+  ReduceMaxGraph g;
+  g.init();
+
+  run_phase(&g.g, false, false);
+
+  check_pre_trans(g.rm->input());
+
+  auto rm_succs = loco::succs(g.rm);
+  EXPECT_EQ(1, rm_succs.size());
+  check_post_trans(*rm_succs.begin());
+
+  auto new_rindices = dynamic_cast<luci::CircleConst *>(g.rm->reduction_indices());
+  EXPECT_NE(nullptr, new_rindices);
+  EXPECT_EQ(1, new_rindices->rank());
+  EXPECT_EQ(2, new_rindices->dim(0).value());
+  EXPECT_EQ(2, new_rindices->size<loco::DataType::S32>());
+  EXPECT_EQ(1, new_rindices->at<loco::DataType::S32>(0));
+  EXPECT_EQ(2, new_rindices->at<loco::DataType::S32>(1));
+}
+
+TEST(ConvertNCHWToNHWC, ReduceMax_keep_dims_false)
+{
+  struct TC
+  {
+    std::vector<int32_t> nchw_ind;
+    std::vector<int32_t> nhwc_ind;
+    std::initializer_list<uint32_t> shape;
+    bool needs_transpose = false;
+  };
+
+  uint32_t n = 1;
+  uint32_t c = 16;
+  uint32_t h = 4;
+  uint32_t w = 4;
+
+  std::vector<TC> test_cases{{{0}, {0}, {c, h, w}, true},       {{1}, {3}, {n, h, w}, false},
+                             {{2}, {1}, {n, c, w}, true},       {{3}, {2}, {n, c, h}, true},
+                             {{0, 1}, {0, 3}, {h, w}, false},   {{0, 2}, {0, 1}, {c, w}, true},
+                             {{0, 3}, {0, 2}, {c, h}, true},    {{1, 2}, {3, 1}, {n, w}, false},
+                             {{1, 3}, {3, 2}, {n, h}, false},   {{2, 3}, {1, 2}, {n, c}, false},
+                             {{0, 1, 2}, {0, 3, 1}, {w}, false}};
+
+  for (auto &tc : test_cases)
+  {
+    ReduceMaxGraph g;
+    g.keep_dims(false);
+    g.axes(tc.nchw_ind);
+    g.shape(tc.shape);
+    g.init();
+
+    run_phase(&g.g, true, true);
+
+    check_pre_trans(g.rm->input());
+
+    auto rm_succs = loco::succs(g.rm);
+    EXPECT_EQ(1, rm_succs.size());
+    if (tc.needs_transpose)
+    {
+      EXPECT_NE(nullptr, dynamic_cast<luci::CircleTranspose *>(*rm_succs.begin()));
+    }
+    else
+    {
+      EXPECT_NE(nullptr, dynamic_cast<luci::CircleOutput *>(*rm_succs.begin()));
+    }
+
+    auto new_rindices = dynamic_cast<luci::CircleConst *>(g.rm->reduction_indices());
+    EXPECT_NE(nullptr, new_rindices);
+    EXPECT_EQ(1, new_rindices->rank());
+    EXPECT_EQ(tc.nhwc_ind.size(), new_rindices->dim(0).value());
+    EXPECT_EQ(tc.nhwc_ind.size(), new_rindices->size<loco::DataType::S32>());
+    for (uint32_t i = 0; i < tc.nhwc_ind.size(); ++i)
+    {
+      EXPECT_EQ(tc.nhwc_ind[i], new_rindices->at<loco::DataType::S32>(i));
+    }
+  }
+}
+
 TEST(ConvertNCHWToNHWC, Relu)
 {
   ReluGraph g;
@@ -1511,6 +1953,57 @@ TEST(ConvertNCHWToNHWC, Rsqrt)
   EXPECT_EQ(16, g.rsqrt->dim(3).value());
 }
 
+TEST(ConvertNCHWToNHWC, Softmax)
+{
+  SoftmaxGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.softmax->logits());
+
+  auto softmax_succs = loco::succs(g.softmax);
+  EXPECT_EQ(1, softmax_succs.size());
+  check_post_trans(*softmax_succs.begin());
+
+  // Check softmax shape
+  EXPECT_EQ(1, g.softmax->dim(0).value());
+  EXPECT_EQ(4, g.softmax->dim(1).value());
+  EXPECT_EQ(4, g.softmax->dim(2).value());
+  EXPECT_EQ(16, g.softmax->dim(3).value());
+}
+
+TEST(ConvertNCHWToNHWC, SplitV)
+{
+  SplitVGraph g;
+  g.init();
+
+  run_phase(g.g(), true, true);
+
+  check_pre_trans(g.splitv()->input());
+
+  auto splitv_succs = loco::succs(g.splitv());
+  for (auto svo : loco::succs(g.splitv()))
+  {
+    for (auto succ : loco::succs(svo))
+    {
+      check_post_trans(succ);
+    }
+  }
+
+  // Check splitv() shape
+  EXPECT_EQ(1, g.splitv()->dim(0).value());
+  EXPECT_EQ(2, g.splitv()->dim(1).value());
+  EXPECT_EQ(192, g.splitv()->dim(2).value());
+  EXPECT_EQ(2, g.splitv()->dim(3).value());
+
+  // Check axis
+  auto axis = dynamic_cast<luci::CircleConst *>(g.splitv()->split_dim());
+  EXPECT_NE(nullptr, axis);
+  EXPECT_EQ(1, axis->size<loco::DataType::S32>());
+  EXPECT_EQ(2, axis->at<loco::DataType::S32>(0));
+}
+
 TEST(ConvertNCHWToNHWC, SquaredDifference)
 {
   SquaredDifferenceGraph g;
@@ -1602,3 +2095,31 @@ TEST(ConvertNCHWToNHWC, SubScalar)
 
   check_pre_trans(g.output->from());
 }
+
+TEST(ConvertNCHWToNHWC, Not_Closed_Case1_NEG)
+{
+  NoPostReshapeGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.relu->features());
+
+  auto relu_succs = loco::succs(g.relu);
+  EXPECT_EQ(1, relu_succs.size());
+  check_post_trans(*relu_succs.begin());
+}
+
+TEST(ConvertNCHWToNHWC, Not_Closed_Case2_NEG)
+{
+  ReluNotClosedGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.relu->features());
+
+  auto relu_succs = loco::succs(g.relu);
+  EXPECT_EQ(1, relu_succs.size());
+  check_post_trans(*relu_succs.begin());
+}
diff --git a/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp
index 11970fff5..72f590135 100644
--- a/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp
+++ b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp
@@ -184,8 +184,63 @@ struct FakeQuantize final : public luci::CircleNodeMutableVisitor<void>
 
   // For non-const activation, insert Quantize-Dequantize Ops
   // and dequantize the node
-  void visit(luci::CircleConv2D *node) { fq_activation(node); }
   void visit(luci::CircleAdd *node) { fq_activation(node); }
+  void visit(luci::CircleAveragePool2D *node) { fq_activation(node); }
+  void visit(luci::CircleBatchMatMul *node) { fq_activation(node); }
+  void visit(luci::CircleConv2D *node) { fq_activation(node); }
+  void visit(luci::CircleDepthwiseConv2D *node) { fq_activation(node); }
+  void visit(luci::CircleDiv *node) { fq_activation(node); }
+  void visit(luci::CircleFullyConnected *node) { fq_activation(node); }
+  void visit(luci::CircleInstanceNorm *node) { fq_activation(node); }
+  void visit(luci::CircleLeakyRelu *node) { fq_activation(node); }
+  void visit(luci::CircleLogistic *node) { fq_activation(node); }
+  void visit(luci::CircleLogSoftmax *node) { fq_activation(node); }
+  void visit(luci::CircleMaxPool2D *node) { fq_activation(node); }
+  void visit(luci::CircleMul *node) { fq_activation(node); }
+  void visit(luci::CircleNeg *node) { fq_activation(node); }
+  void visit(luci::CirclePad *node) { fq_activation(node); }
+  void visit(luci::CirclePRelu *node) { fq_activation(node); }
+  void visit(luci::CircleMean *node) { fq_activation(node); }
+  void visit(luci::CircleReduceMax *node) { fq_activation(node); }
+  void visit(luci::CircleRelu *node) { fq_activation(node); }
+  void visit(luci::CircleRelu6 *node) { fq_activation(node); }
+  void visit(luci::CircleResizeBilinear *node) { fq_activation(node); }
+  void visit(luci::CircleResizeNearestNeighbor *node) { fq_activation(node); }
+  void visit(luci::CircleRsqrt *node) { fq_activation(node); }
+  void visit(luci::CircleSoftmax *node) { fq_activation(node); }
+  void visit(luci::CircleSqrt *node) { fq_activation(node); }
+  void visit(luci::CircleTanh *node) { fq_activation(node); }
+  void visit(luci::CircleTransposeConv *node) { fq_activation(node); }
+
+  // For Ops that do not change the value of input, do nothing
+  // (dtype will be automatically updated by type inference)
+  void visit(luci::CircleCast *) {}
+  void visit(luci::CircleConcatenation *) {}
+  void visit(luci::CircleGather *) {}
+  void visit(luci::CircleSlice *) {}
+  void visit(luci::CircleStridedSlice *) {}
+  void visit(luci::CircleReshape *) {}
+  void visit(luci::CircleSplit *) {}
+  void visit(luci::CircleSplitOut *) {}
+  void visit(luci::CircleSplitV *) {}
+  void visit(luci::CircleSplitVOut *) {}
+  void visit(luci::CircleTranspose *) {}
+
+  // For Ops that return index, fake quantization is unnecessary
+  void visit(luci::CircleArgMax *) {}
+
+  // Virtual node
+  void visit(luci::CircleOutputExclude *) {}
+
+  void visit(luci::CircleQuantize *node)
+  {
+    RETURN_UNLESS(is_quant_act(node));
+
+    insert_dequantize(node);
+  }
+
+  // Dequantize Op does nothing in fp32 model
+  void visit(luci::CircleDequantize *) {}
 };
 
 #undef RETURN_UNLESS
diff --git a/compiler/luci/pass/src/FoldDensifyPass.cpp b/compiler/luci/pass/src/FoldDensifyPass.cpp
new file mode 100644
index 000000000..5ddc743e5
--- /dev/null
+++ b/compiler/luci/pass/src/FoldDensifyPass.cpp
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldDensifyPass.h"
+#include "helpers/SparsityFormatConverter.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+#include <cassert>
+#include <vector>
+
+namespace
+{
+
+bool is_foldable_const(luci::CircleConst *node)
+{
+  if (node->sparsityparam() == nullptr)
+    return false;
+
+  if (node->dtype() == loco::DataType::FLOAT32)
+    return true;
+  if (node->dtype() == loco::DataType::FLOAT16)
+    return true;
+
+  return false;
+}
+
+luci::CircleConst *densified_const_node(luci::CircleConst *const_node)
+{
+  assert(const_node->sparsityparam());
+
+  auto name = const_node->name();
+  assert(name.length() > 0);
+  auto g = const_node->graph();
+  auto new_const_node = g->nodes()->create<luci::CircleConst>();
+
+  new_const_node->dtype(const_node->dtype());
+  new_const_node->rank(const_node->rank());
+
+  uint32_t dim_size = 1;
+  std::vector<int> dense_shape;
+  for (uint32_t i = 0; i < new_const_node->rank(); ++i)
+  {
+    assert(const_node->dim(i).known());
+    new_const_node->dim(i) = const_node->dim(i);
+
+    uint32_t value = const_node->dim(i).value();
+    dim_size *= value;
+    dense_shape.emplace_back(static_cast<int32_t>(value));
+  }
+
+  if (const_node->dtype() == loco::DataType::FLOAT32)
+    new_const_node->size<loco::DataType::FLOAT32>(dim_size);
+  else
+  {
+    assert(const_node->dtype() == loco::DataType::FLOAT16);
+    new_const_node->size<loco::DataType::FLOAT16>(dim_size);
+  }
+
+  new_const_node->shape_status(luci::ShapeStatus::VALID);
+  new_const_node->name(name + "_DS");
+
+  if (const_node->dtype() == loco::DataType::FLOAT32)
+  {
+    auto const_items = const_node->size<loco::DataType::FLOAT32>();
+    auto f_data = std::make_unique<float[]>(const_items);
+    for (size_t i = 0; i < const_items; ++i)
+      f_data[i] = const_node->at<loco::DataType::FLOAT32>(i);
+
+    sparsity::TfLiteSparsity sp = to_tflite_sparsity(const_node->sparsityparam());
+    sparsity::FormatConverter<float> converter(dense_shape, sp);
+    converter.SparseToDense(f_data.get());
+    const auto &data_dense = converter.GetData();
+    assert(data_dense.size() == dim_size);
+
+    for (uint32_t i = 0; i < dim_size; ++i)
+      new_const_node->at<loco::DataType::FLOAT32>(i) = data_dense[i];
+
+    luci::freeTfLiteSparsity(sp);
+  }
+  else
+  {
+    assert(const_node->dtype() == loco::DataType::FLOAT16);
+
+    auto const_items = const_node->size<loco::DataType::FLOAT16>();
+    auto f_data = std::make_unique<uint16_t[]>(const_items);
+    for (size_t i = 0; i < const_items; ++i)
+      f_data[i] = const_node->at<loco::DataType::FLOAT16>(i);
+
+    // Primitive type for FLOAT16 is UINT16
+    sparsity::TfLiteSparsity sp = to_tflite_sparsity(const_node->sparsityparam());
+    sparsity::FormatConverter<uint16_t> converter(dense_shape, sp);
+    converter.SparseToDense(f_data.get());
+    const auto &data_dense = converter.GetData();
+    assert(data_dense.size() == dim_size);
+    for (uint32_t i = 0; i < dim_size; ++i)
+      new_const_node->at<loco::DataType::FLOAT16>(i) = data_dense[i];
+
+    luci::freeTfLiteSparsity(sp);
+  }
+
+  return new_const_node;
+}
+
+/**
+ * @brief Fold Densify if input is Sparse Constant
+ */
+bool fold_densify(luci::CircleDensify *densify)
+{
+  auto const_input = dynamic_cast<luci::CircleConst *>(densify->input());
+  if (not const_input)
+    return false;
+
+  if (not is_foldable_const(const_input))
+    return false;
+
+  auto dense_const = densified_const_node(const_input);
+  assert(dense_const);
+
+  loco::replace(densify).with(dense_const);
+  luci::add_origin(dense_const, luci::composite_origin(
+                                  {luci::get_origin(densify), luci::get_origin(const_input)}));
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *
+ *    [CircleConst](sparse)
+ *         |
+ *   [CircleDensify]
+ *         |
+ *    [CircleNode]
+ *         |
+ *
+ * AFTER
+ *
+ *    [CircleConst](dense)  [CircleConst](sparse)
+ *         |                     |
+ *    [CircleNode]          [CircleDensify]
+ *         |
+ */
+bool FoldDensifyPass::run(loco::Graph *g)
+{
+  bool changed = false;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto densify = dynamic_cast<luci::CircleDensify *>(node))
+    {
+      if (fold_densify(densify))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FoldDensifyPass.test.cpp b/compiler/luci/pass/src/FoldDensifyPass.test.cpp
new file mode 100644
index 000000000..2f9736f49
--- /dev/null
+++ b/compiler/luci/pass/src/FoldDensifyPass.test.cpp
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldDensifyPass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class FoldDensifyPassGraph : public luci::ConstantFoldingAddTestGraph
+{
+public:
+  FoldDensifyPassGraph(std::initializer_list<uint32_t> shape)
+    : luci::ConstantFoldingAddTestGraph(shape, loco::DataType::FLOAT32)
+  {
+    _densify = _g.nodes()->create<luci::CircleDensify>();
+    _x = _g.nodes()->create<luci::CircleConst>();
+
+    _densify->dtype(loco::DataType::FLOAT32);
+    _x->dtype(loco::DataType::FLOAT32);
+
+    _densify->shape(shape);
+    _x->shape(shape);
+
+    _densify->input(_x);
+
+    _densify->name("densify");
+    _x->name("x");
+  }
+
+  loco::Node *createFoldedPattern() override { return _densify; }
+
+public:
+  void fill_const_dense(void)
+  {
+    uint32_t num_elems = 1;
+    for (uint32_t r = 0; r < _x->rank(); ++r)
+      num_elems *= _x->dim(r).value();
+
+    _x->size<loco::DataType::FLOAT32>(num_elems);
+    for (uint32_t i = 0; i < num_elems; i++)
+      _x->at<loco::DataType::FLOAT32>(i) = static_cast<float>(i + 1);
+  }
+
+  void fill_const_sparse(void)
+  {
+    // fill 4x4 of
+    //  [[1 0 0 0]
+    //   [0 2 0 0]
+    //   [0 0 3 0]
+    //   [0 0 0 4]]
+
+    // values of 1.0, 2.0, 3.0, 4.0
+    uint32_t udata[] = {0x3f800000, 0x40000000, 0x40400000, 0x40800000};
+    float *fdata = reinterpret_cast<float *>(udata);
+
+    _x->size<loco::DataType::FLOAT32>(4);
+    for (uint32_t i = 0; i < 4; i++)
+      _x->at<loco::DataType::FLOAT32>(i) = fdata[i];
+
+    auto sparsityparam = std::make_unique<luci::SparsityParam>();
+    sparsityparam->traversal_order = std::vector<int32_t>({0, 1});
+    sparsityparam->block_map = std::vector<int32_t>({});
+
+    auto dm0 = luci::DimMetaData(luci::DimensionType::DENSE, 4);
+
+    std::vector<int32_t> as_vec = {0, 1, 2, 3, 4};
+    std::vector<int32_t> ai_vec = {0, 1, 2, 3};
+    auto as = luci::SparseIndexVector(luci::SparseIndexVectorType::I32, as_vec);
+    auto ai = luci::SparseIndexVector(luci::SparseIndexVectorType::I32, ai_vec);
+    auto dm1 = luci::DimMetaData(luci::DimensionType::SPARSE_CSR, 0, as, ai);
+    sparsityparam->dim_metadata.emplace_back(dm0);
+    sparsityparam->dim_metadata.emplace_back(dm1);
+
+    _x->sparsityparam(std::move(sparsityparam));
+  }
+
+protected:
+  luci::CircleDensify *_densify = nullptr;
+  luci::CircleConst *_x = nullptr;
+};
+
+class FoldDensifyPassGraphTest : public FoldDensifyPassGraph, public ::testing::Test
+{
+public:
+  FoldDensifyPassGraphTest() : FoldDensifyPassGraph({4, 4}) {}
+
+  virtual void SetUp() { init(); }
+};
+
+} // namespace
+
+TEST(FoldDensifyPassGraph, name)
+{
+  luci::FoldDensifyPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(FoldDensifyPassGraphTest, no_sparsity_param_NEG)
+{
+  fill_const_dense();
+
+  luci::FoldDensifyPass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(nullptr, folded_const);
+}
+
+TEST_F(FoldDensifyPassGraphTest, sparsity_param)
+{
+  fill_const_sparse();
+
+  luci::FoldDensifyPass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  EXPECT_EQ(2, folded_const->rank());
+  EXPECT_EQ(4, folded_const->dim(0).value());
+  EXPECT_EQ(4, folded_const->dim(1).value());
+  EXPECT_EQ(16, folded_const->size<loco::DataType::FLOAT32>());
+  for (int y = 0; y < 4; ++y)
+  {
+    for (int x = 0; x < 4; ++x)
+    {
+      float ovalue = folded_const->at<loco::DataType::FLOAT32>(y * 4 + x);
+      float fvalue = 0.0;
+      if (x == y)
+      {
+        // diagonal position
+        fvalue = static_cast<float>(y + 1);
+      }
+      EXPECT_EQ(fvalue, ovalue);
+    }
+  }
+}
diff --git a/compiler/luci/pass/src/FoldDequantizePass.cpp b/compiler/luci/pass/src/FoldDequantizePass.cpp
index 3dd4f8cea..b6526deb0 100644
--- a/compiler/luci/pass/src/FoldDequantizePass.cpp
+++ b/compiler/luci/pass/src/FoldDequantizePass.cpp
@@ -19,6 +19,8 @@
 #include <luci/IR/CircleNodes.h>
 #include <luci/Profile/CircleNodeOrigin.h>
 
+#include <fp16.h>
+
 namespace
 {
 
@@ -32,6 +34,9 @@ bool is_hybrid_kernel_supported(loco::Node *node)
 
 bool is_foldable_const(luci::CircleConst *node)
 {
+  if (node->dtype() == loco::DataType::FLOAT16)
+    return true;
+
   if (node->quantparam() == nullptr)
     return false;
 
@@ -39,17 +44,18 @@ bool is_foldable_const(luci::CircleConst *node)
     return true;
   if (node->dtype() == loco::DataType::U8)
     return true;
+  if (node->dtype() == loco::DataType::S16)
+    return true;
+  if (node->dtype() == loco::DataType::S32)
+    return true;
+  if (node->dtype() == loco::DataType::S64)
+    return true;
 
   return false;
 }
 
 luci::CircleConst *dequantized_const_node(luci::CircleConst *const_node)
 {
-  if (const_node->quantparam() == nullptr)
-  {
-    throw std::runtime_error("Given constant node has no quantization parameter");
-  }
-
   auto name = const_node->name();
   assert(name.length() > 0);
   auto g = const_node->graph();
@@ -67,38 +73,70 @@ luci::CircleConst *dequantized_const_node(luci::CircleConst *const_node)
   new_const_node->shape_status(luci::ShapeStatus::VALID);
   new_const_node->name(name + "_DQ");
 
+  if (const_node->dtype() == loco::DataType::FLOAT16)
+  {
+    for (uint32_t i = 0; i < new_const_node->size<loco::DataType::FLOAT32>(); ++i)
+    {
+      auto raw = const_node->at<loco::DataType::FLOAT16>(i);
+      new_const_node->at<loco::DataType::FLOAT32>(i) = fp16_ieee_to_fp32_value(raw);
+    }
+    return new_const_node;
+  }
+
+  if (const_node->quantparam() == nullptr)
+  {
+    throw std::runtime_error("Given constant node has no quantization parameter");
+  }
+
   const int32_t q_dim = const_node->quantparam()->quantized_dimension;
-  const int32_t q_dim_value = const_node->dim(q_dim).value();
+  // For scalar, q_dim_value is 1
+  // For non-scalar, q_dim_value is the size of quantized dimension
+  const int32_t q_dim_value = const_node->rank() == 0 ? 1 : const_node->dim(q_dim).value();
 
   int32_t right_count = q_dim_value;
   for (uint32_t i = q_dim + 1; i < const_node->rank(); ++i)
     right_count *= const_node->dim(i).value();
 
-  if (const_node->dtype() == loco::DataType::S8)
+  for (uint32_t i = 0; i < new_const_node->size<loco::DataType::FLOAT32>(); ++i)
   {
-    for (uint32_t i = 0; i < const_node->size<loco::DataType::S8>(); ++i)
-    {
-      uint32_t qd = (i % right_count) / (right_count / q_dim_value);
-      if (qd >= const_node->quantparam()->zerop.size())
-        qd = 0;
+    uint32_t qd = (i % right_count) / (right_count / q_dim_value);
+    if (qd >= const_node->quantparam()->zerop.size())
+      qd = 0;
 
-      new_const_node->at<loco::DataType::FLOAT32>(i) =
-        (float)(const_node->at<loco::DataType::S8>(i) - const_node->quantparam()->zerop.at(qd)) *
-        const_node->quantparam()->scale.at(qd);
-    }
-  }
-  else
-  {
-    for (uint32_t i = 0; i < const_node->size<loco::DataType::U8>(); ++i)
+    switch (const_node->dtype())
     {
-      uint32_t qd = (i % right_count) / (right_count / q_dim_value);
-      if (qd >= const_node->quantparam()->zerop.size())
-        qd = 0;
-
-      new_const_node->at<loco::DataType::FLOAT32>(i) =
-        (float)((int)const_node->at<loco::DataType::U8>(i) -
-                const_node->quantparam()->zerop.at(qd)) *
-        const_node->quantparam()->scale.at(qd);
+      case loco::DataType::S8:
+        new_const_node->at<loco::DataType::FLOAT32>(i) =
+          static_cast<float>(const_node->at<loco::DataType::S8>(i) -
+                             const_node->quantparam()->zerop.at(qd)) *
+          const_node->quantparam()->scale.at(qd);
+        break;
+      case loco::DataType::S16:
+        new_const_node->at<loco::DataType::FLOAT32>(i) =
+          static_cast<float>(const_node->at<loco::DataType::S16>(i) -
+                             const_node->quantparam()->zerop.at(qd)) *
+          const_node->quantparam()->scale.at(qd);
+        break;
+      case loco::DataType::S32:
+        new_const_node->at<loco::DataType::FLOAT32>(i) =
+          static_cast<float>(const_node->at<loco::DataType::S32>(i) -
+                             const_node->quantparam()->zerop.at(qd)) *
+          const_node->quantparam()->scale.at(qd);
+        break;
+      case loco::DataType::S64:
+        new_const_node->at<loco::DataType::FLOAT32>(i) =
+          static_cast<float>(const_node->at<loco::DataType::S64>(i) -
+                             const_node->quantparam()->zerop.at(qd)) *
+          const_node->quantparam()->scale.at(qd);
+        break;
+      case loco::DataType::U8:
+        new_const_node->at<loco::DataType::FLOAT32>(i) =
+          static_cast<float>(const_node->at<loco::DataType::U8>(i) -
+                             const_node->quantparam()->zerop.at(qd)) *
+          const_node->quantparam()->scale.at(qd);
+        break;
+      default:
+        throw std::runtime_error("Not supported dtype for FoldDequantizePass");
     }
   }
 
@@ -160,7 +198,7 @@ bool FoldDequantizePass::run(loco::Graph *g)
 {
   bool changed = false;
 
-  for (auto node : loco::all_nodes(g))
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
   {
     if (auto circle_dequant = dynamic_cast<luci::CircleDequantize *>(node))
     {
diff --git a/compiler/luci/pass/src/FoldDequantizePass.test.cpp b/compiler/luci/pass/src/FoldDequantizePass.test.cpp
index d82a7bc87..fb5b6adc0 100644
--- a/compiler/luci/pass/src/FoldDequantizePass.test.cpp
+++ b/compiler/luci/pass/src/FoldDequantizePass.test.cpp
@@ -15,12 +15,389 @@
  */
 
 #include "luci/Pass/FoldDequantizePass.h"
+#include "PassTestGraphs.h"
 
 #include <gtest/gtest.h>
 
+namespace
+{
+
+template <loco::DataType DT>
+class FoldDequantizeTest : public luci::ConstantFoldingAddTestGraph, public ::testing::Test
+{
+public:
+  FoldDequantizeTest() : luci::ConstantFoldingAddTestGraph({2, 2, 2}, DT) {}
+
+  virtual void SetUp() { init(); }
+
+  loco::Node *createFoldedPattern() override
+  {
+    _dequantize = _g.nodes()->create<luci::CircleDequantize>();
+    _input = _g.nodes()->create<luci::CircleConst>();
+
+    _dequantize->dtype(loco::DataType::FLOAT32);
+    _input->dtype(DT);
+
+    _input->shape({2, 2, 2});
+
+    _input->size<DT>(8);
+    _input->at<DT>(0) = 0;
+    _input->at<DT>(1) = 1;
+    _input->at<DT>(2) = 2;
+    _input->at<DT>(3) = 3;
+    _input->at<DT>(4) = 4;
+    _input->at<DT>(5) = 5;
+    _input->at<DT>(6) = 6;
+    _input->at<DT>(7) = 7;
+
+    auto qparam = std::make_unique<luci::CircleQuantParam>();
+    qparam->quantized_dimension = 1;
+    qparam->scale.push_back(5.0);
+    qparam->scale.push_back(10.0);
+    qparam->zerop.push_back(1);
+    qparam->zerop.push_back(2);
+    _input->quantparam(std::move(qparam));
+
+    _dequantize->input(_input);
+
+    _dequantize->name("dequantize");
+    _input->name("input");
+
+    return _dequantize;
+  }
+
+  void createScalarPattern()
+  {
+    _input->rank(0);
+    _input->size<DT>(1);
+    _input->at<DT>(0) = 1;
+
+    auto qparam = std::make_unique<luci::CircleQuantParam>();
+    qparam->quantized_dimension = 0;
+    qparam->scale.push_back(1.0);
+    qparam->zerop.push_back(0);
+    _input->quantparam(std::move(qparam));
+  }
+
+  void createNotFoldablePattern() { _input->quantparam(nullptr); }
+
+protected:
+  luci::CircleDequantize *_dequantize = nullptr;
+  luci::CircleConst *_input = nullptr;
+};
+
+class S8FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::S8>
+{
+};
+
+class S16FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::S16>
+{
+};
+
+class S32FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::S32>
+{
+};
+
+class S64FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::S64>
+{
+};
+
+class U8FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::U8>
+{
+};
+
+class F16FoldDequantizeTest : public luci::ConstantFoldingTestGraph, public ::testing::Test
+{
+public:
+  F16FoldDequantizeTest() : ConstantFoldingTestGraph({2, 2}, loco::DataType::FLOAT16) {}
+
+  virtual void SetUp() { init(); }
+
+  loco::Node *createFoldedPattern() override
+  {
+    const auto DT = loco::DataType::FLOAT16;
+    _dequantize = _g.nodes()->create<luci::CircleDequantize>();
+    _f16const = _g.nodes()->create<luci::CircleConst>();
+
+    _dequantize->dtype(loco::DataType::FLOAT32);
+    _f16const->dtype(DT);
+
+    _f16const->shape({2, 2});
+
+    _f16const->size<loco::DataType::FLOAT16>(4);
+    _f16const->at<DT>(0) = 49408; // -2.5f
+    _f16const->at<DT>(1) = 47104; // -0.5f
+    _f16const->at<DT>(2) = 0;     //  0.0f
+    _f16const->at<DT>(3) = 15872; //  1.5f
+    // NOTE how to get uint16_t value of float16 ?
+    // Use compiler/souschef/src/Gaussian.cpp GaussianFloat16DataChef::generate()
+    //   uint16_t value = fp16_ieee_from_fp32_value(-2.5);
+    //   printf("-2.5 = %u\r\n", value);
+
+    _dequantize->input(_f16const);
+
+    _dequantize->name("dequantize");
+    _f16const->name("input");
+
+    _output->from(_dequantize);
+
+    return _dequantize;
+  }
+
+  void createNotFoldablePattern() { _dequantize->input(_input); }
+
+protected:
+  luci::CircleConst *getFoldedPattern() override
+  {
+    return dynamic_cast<luci::CircleConst *>(_output->from());
+  }
+
+  void init() override { createFoldedPattern(); }
+
+protected:
+  luci::CircleDequantize *_dequantize = nullptr;
+  luci::CircleConst *_f16const = nullptr;
+};
+
+} // namespace
+
 TEST(FoldDequantizePassTest, name)
 {
   luci::FoldDequantizePass pass;
   auto const name = pass.name();
   ASSERT_NE(nullptr, name);
 }
+
+TEST_F(U8FoldDequantizeTest, fold_dequant_basic)
+{
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Chec type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+  EXPECT_EQ(3, folded_const->rank());
+  EXPECT_EQ(2, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->dim(1).value());
+  EXPECT_EQ(2, folded_const->dim(2).value());
+  EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2));
+  EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3));
+  EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4));
+  EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5));
+  EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6));
+  EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7));
+}
+
+TEST_F(U8FoldDequantizeTest, fold_dequant_basic_NEG)
+{
+  createNotFoldablePattern();
+
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(nullptr, folded_const);
+}
+
+TEST_F(S8FoldDequantizeTest, fold_dequant_basic)
+{
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Chec type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+  EXPECT_EQ(3, folded_const->rank());
+  EXPECT_EQ(2, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->dim(1).value());
+  EXPECT_EQ(2, folded_const->dim(2).value());
+  EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2));
+  EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3));
+  EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4));
+  EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5));
+  EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6));
+  EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7));
+}
+
+TEST_F(S8FoldDequantizeTest, fold_dequant_basic_NEG)
+{
+  createNotFoldablePattern();
+
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(nullptr, folded_const);
+}
+
+TEST_F(S16FoldDequantizeTest, fold_dequant_basic)
+{
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Chec type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+  EXPECT_EQ(3, folded_const->rank());
+  EXPECT_EQ(2, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->dim(1).value());
+  EXPECT_EQ(2, folded_const->dim(2).value());
+  EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2));
+  EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3));
+  EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4));
+  EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5));
+  EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6));
+  EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7));
+}
+
+TEST_F(S16FoldDequantizeTest, fold_dequant_basic_NEG)
+{
+  createNotFoldablePattern();
+
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(nullptr, folded_const);
+}
+
+TEST_F(S32FoldDequantizeTest, fold_dequant_basic)
+{
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Chec type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+  EXPECT_EQ(3, folded_const->rank());
+  EXPECT_EQ(2, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->dim(1).value());
+  EXPECT_EQ(2, folded_const->dim(2).value());
+  EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2));
+  EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3));
+  EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4));
+  EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5));
+  EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6));
+  EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7));
+}
+
+TEST_F(S32FoldDequantizeTest, fold_dequant_basic_NEG)
+{
+  createNotFoldablePattern();
+
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(nullptr, folded_const);
+}
+
+TEST_F(S64FoldDequantizeTest, fold_dequant_basic)
+{
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Chec type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+  EXPECT_EQ(3, folded_const->rank());
+  EXPECT_EQ(2, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->dim(1).value());
+  EXPECT_EQ(2, folded_const->dim(2).value());
+  EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2));
+  EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3));
+  EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4));
+  EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5));
+  EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6));
+  EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7));
+}
+
+TEST_F(S64FoldDequantizeTest, fold_dequant_basic_NEG)
+{
+  createNotFoldablePattern();
+
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(nullptr, folded_const);
+}
+
+TEST_F(U8FoldDequantizeTest, fold_dequant_scalar)
+{
+  createScalarPattern();
+
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Check type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+  EXPECT_EQ(0, folded_const->rank());
+  EXPECT_EQ(1.0, folded_const->at<loco::DataType::FLOAT32>(0));
+}
+
+TEST_F(F16FoldDequantizeTest, fold_dequant_basic)
+{
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Chec type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+  EXPECT_EQ(2, folded_const->rank());
+  EXPECT_EQ(2, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->dim(1).value());
+  EXPECT_EQ(-2.5, folded_const->at<loco::DataType::FLOAT32>(0));
+  EXPECT_EQ(-0.5, folded_const->at<loco::DataType::FLOAT32>(1));
+  EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2));
+  EXPECT_EQ(1.5, folded_const->at<loco::DataType::FLOAT32>(3));
+}
+
+TEST_F(F16FoldDequantizeTest, fold_dequant_basic_NEG)
+{
+  createNotFoldablePattern();
+
+  luci::FoldDequantizePass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(nullptr, folded_const);
+}
diff --git a/compiler/luci/pass/src/FoldSparseToDensePass.cpp b/compiler/luci/pass/src/FoldSparseToDensePass.cpp
index 0c6fc43ed..ed60d8899 100644
--- a/compiler/luci/pass/src/FoldSparseToDensePass.cpp
+++ b/compiler/luci/pass/src/FoldSparseToDensePass.cpp
@@ -19,6 +19,8 @@
 
 #include <luci/IR/CircleNodes.h>
 
+#include <limits>
+
 namespace
 {
 
diff --git a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp
index 2c990f0a5..bc09abee2 100644
--- a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp
+++ b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp
@@ -22,6 +22,7 @@
 #include <luci/Profile/CircleNodeOrigin.h>
 #include <luci/Service/CircleShapeInference.h>
 #include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Service/CircleNodeClone.h>
 
 namespace
 {
@@ -55,6 +56,26 @@ void copy_shape(luci::CircleReshape *reshape, luci::CircleReshape *new_reshape)
     new_reshape->newShape()->dim(r) = reshape->newShape()->dim(r);
 }
 
+luci::CircleReshape *create_cloned_reshape(luci::CircleReshape *reshape)
+{
+  assert(reshape != nullptr); // FIX_CALLER_UNLESS
+
+  luci::CircleConst *cloned_shape = clone_shape(reshape);
+  if (cloned_shape == nullptr)
+    return nullptr;
+
+  auto cloned_node = luci::clone_node(reshape, reshape->graph());
+  if (cloned_node == nullptr)
+    return nullptr;
+
+  auto new_reshape = loco::must_cast<luci::CircleReshape *>(cloned_node);
+  new_reshape->shape(cloned_shape);
+  new_reshape->name(reshape->name() + "_C");
+  luci::add_origin(new_reshape, luci::get_origin(reshape));
+
+  return new_reshape;
+}
+
 bool forward_reshape(luci::CircleReshape *reshape, luci::CircleNeg *neg)
 {
   assert(reshape != nullptr);
@@ -85,6 +106,26 @@ bool forward_reshape(luci::CircleReshape *reshape, luci::CircleNeg *neg)
   return true;
 }
 
+bool forward_reshape(luci::CircleReshape *reshape, luci::CircleLogistic *logit)
+{
+  assert(reshape != nullptr); // FIX_CALLER_UNLESS
+  assert(logit != nullptr);   // FIX_CALLER_UNLESS
+
+  auto new_reshape = create_cloned_reshape(reshape);
+  if (not new_reshape)
+    return false;
+
+  // reconnect network
+  loco::replace(logit).with(new_reshape);
+  logit->x(reshape->tensor());
+  new_reshape->tensor(logit);
+
+  // Do shape inference for this node again.
+  logit->shape_status(luci::ShapeStatus::UNDEFINED);
+
+  return true;
+}
+
 class ForwardReshape final : public luci::CircleNodeMutableVisitor<bool>
 {
 protected:
@@ -103,6 +144,14 @@ protected:
     return forward_reshape(reshape, node);
   }
 
+  bool visit(luci::CircleLogistic *node)
+  {
+    auto reshape = as_reshape(node->x());
+    if (reshape == nullptr)
+      return false;
+
+    return forward_reshape(reshape, node);
+  }
   // TODO add more unary operators
 };
 
diff --git a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp
index 2593a014c..373513270 100644
--- a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp
+++ b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp
@@ -65,6 +65,42 @@ protected:
   luci::CircleConst *_reshape_shape = nullptr;
 };
 
+// TODO Reduce duplicate code with ReshapeNegGraphlet
+class ReshapeLogisticGraphlet
+{
+public:
+  ReshapeLogisticGraphlet() = default;
+
+public:
+  void init(loco::Graph *g, const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    std::vector<uint32_t> shape_out_v = shape_out;
+
+    _reshape_shape = g->nodes()->create<luci::CircleConst>();
+    _reshape = g->nodes()->create<luci::CircleReshape>();
+    _logistic = g->nodes()->create<luci::CircleLogistic>();
+
+    _reshape_shape->dtype(loco::DataType::S32);
+    _reshape_shape->rank(1);
+    _reshape_shape->dim(0).set(shape_out_v.size());
+    _reshape_shape->shape_status(luci::ShapeStatus::VALID);
+    // values
+    const auto size = shape_out_v.size();
+    _reshape_shape->size<loco::DataType::S32>(size);
+    for (uint32_t i = 0; i < size; i++)
+      _reshape_shape->at<loco::DataType::S32>(i) = shape_out_v[i];
+
+    _reshape_shape->name("reshape_shape");
+    _reshape->name("reshape");
+    _logistic->name("logistic");
+  }
+
+protected:
+  luci::CircleReshape *_reshape = nullptr;
+  luci::CircleLogistic *_logistic = nullptr;
+  luci::CircleConst *_reshape_shape = nullptr;
+};
+
 class ForwardReshapeToNegGraph : public TestIOGraph, public ReshapeNegGraphlet
 {
 public:
@@ -85,6 +121,26 @@ public:
   }
 };
 
+class ForwardReshapeToLogisticGraph : public TestIOGraph, public ReshapeLogisticGraphlet
+{
+public:
+  ForwardReshapeToLogisticGraph() = default;
+
+public:
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    ReshapeLogisticGraphlet::init(g(), shape_in, shape_out);
+
+    // connect network
+    _reshape->tensor(input());
+    _reshape->shape(_reshape_shape);
+    _logistic->x(_reshape);
+
+    output()->from(_logistic);
+  }
+};
+
 class ForwardReshapeToNegGraphTest : public ::testing::Test
 {
 public:
@@ -101,6 +157,22 @@ protected:
   luci::ForwardReshapeToUnaryOpPass _pass;
 };
 
+class ForwardReshapeToLogisticGraphTest : public ::testing::Test
+{
+public:
+  ForwardReshapeToLogisticGraphTest() = default;
+
+  void run_pass(void)
+  {
+    while (_pass.run(_graph.g()))
+      ;
+  }
+
+protected:
+  ForwardReshapeToLogisticGraph _graph;
+  luci::ForwardReshapeToUnaryOpPass _pass;
+};
+
 } // namespace
 
 TEST(ForwardReshapeToUnaryOpPassTest, name)
@@ -123,3 +195,17 @@ TEST_F(ForwardReshapeToNegGraphTest, simple_forward)
   neg = dynamic_cast<luci::CircleNeg *>(reshape->tensor());
   ASSERT_NE(nullptr, neg);
 }
+
+TEST_F(ForwardReshapeToLogisticGraphTest, forward)
+{
+  _graph.init({2, 2, 2}, {2, 4});
+
+  run_pass();
+
+  auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+  auto log = dynamic_cast<luci::CircleLogistic *>(_graph.output()->from());
+  ASSERT_NE(nullptr, reshape);
+  ASSERT_EQ(nullptr, log);
+  log = dynamic_cast<luci::CircleLogistic *>(reshape->tensor());
+  ASSERT_NE(nullptr, log);
+}
diff --git a/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp
index 97a962cb6..3cf31ed10 100644
--- a/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp
+++ b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp
@@ -99,6 +99,12 @@ bool fuse_add_with_fc(luci::CircleFullyConnected *fc)
       fused_bias->at<loco::DataType::FLOAT32>(i) += const_bias->at<loco::DataType::FLOAT32>(i);
   }
 
+  // At this point, it is guarateed that fused_bias's shape is [1, 1, ..., N] or [N]
+  // where N is weights->dim(0).
+  // The shape is normalized to [N] to become the bias of FC
+  fused_bias->rank(1);
+  fused_bias->dim(0) = weights->dim(0);
+
   fc->bias(fused_bias);
   fc->fusedActivationFunction(add->fusedActivationFunction());
 
diff --git a/compiler/luci/pass/src/FuseAddWithTConvPass.cpp b/compiler/luci/pass/src/FuseAddWithTConvPass.cpp
index 2bca57014..852bc8b63 100644
--- a/compiler/luci/pass/src/FuseAddWithTConvPass.cpp
+++ b/compiler/luci/pass/src/FuseAddWithTConvPass.cpp
@@ -37,10 +37,10 @@ namespace
  *             \    |
  *         [CircleTransposeConv]   [CircleAdd]
  *                  |
- *            ([CircleRelu6])
+ *          ([CircleRelu/Relu6])
  *                  |
  *
- *  Note: CircleRelu6 is inserted if Add activation is ReLU6
+ *  Note: CircleRelu/Relu6 is inserted if Add activation is ReLU6
  */
 bool fuse_add_with_tconv(luci::CircleTransposeConv *tconv)
 {
@@ -65,7 +65,8 @@ bool fuse_add_with_tconv(luci::CircleTransposeConv *tconv)
   if (add->dtype() != loco::DataType::FLOAT32)
     return false;
   if (add->fusedActivationFunction() != luci::FusedActFunc::NONE &&
-      add->fusedActivationFunction() != luci::FusedActFunc::RELU6)
+      add->fusedActivationFunction() != luci::FusedActFunc::RELU6 &&
+      add->fusedActivationFunction() != luci::FusedActFunc::RELU)
     return false;
 
   // get addition
@@ -102,6 +103,19 @@ bool fuse_add_with_tconv(luci::CircleTransposeConv *tconv)
     // remove add node
     replace(add).with(relu);
   }
+  else if (add->fusedActivationFunction() == luci::FusedActFunc::RELU)
+  {
+    auto name = addition->name();
+    assert(name.length() > 0);
+    // separate relu op from add op
+    auto relu = add->graph()->nodes()->create<luci::CircleRelu>();
+    relu->features(tconv);
+    relu->name(name + "/Relu");
+    luci::add_origin(relu, luci::get_origin(add));
+
+    // remove add node
+    replace(add).with(relu);
+  }
   else
   {
     replace(add).with(tconv);
diff --git a/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp b/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp
index 337954960..e6b54df36 100644
--- a/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp
+++ b/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp
@@ -29,7 +29,7 @@ namespace
  *  NOTE TF's BatchNormalization is converted to Mul and Add.
  *
  *  BEFORE
- *                     |   [CircleOutputExclude]
+ *                     |   [CircleConst]/[CircleOutputExclude]
  *                     |   / [CircleConst]
  *                     |  / /
  *     [CircleTransposeConv]  [CircleConst]
@@ -40,7 +40,7 @@ namespace
  *                     |
  *
  *  AFTER
- *                     |                                          [CircleOutputExclude]
+ *                     |                                         [CircleConst]/[CircleOutputExclude]
  *                     +-------------------------------------+   / [CircleConst]
  *                     |                                     |  / /
  *                     |                     [CircleTransposeConv]  [CircleConst]
@@ -69,9 +69,10 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add)
     return false;
 
   // check scale and shift constant attributes
-  if (scale->rank() != 1)
+  // TODO maybe rank check is not needed
+  if (scale->rank() != 1 && scale->rank() != 4)
     return false;
-  if (shift->rank() != 1)
+  if (shift->rank() != 1 && shift->rank() != 4)
     return false;
   // check mul, add attributes
   if (mul->dtype() != loco::DataType::FLOAT32)
@@ -82,9 +83,8 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add)
       add->fusedActivationFunction() != luci::FusedActFunc::RELU6)
     return false;
 
-  // tconv bias should be not set
-  if (not dynamic_cast<luci::CircleOutputExclude *>(tconv->bias()))
-    return false;
+  // tconv bias is optional
+  auto bias = dynamic_cast<luci::CircleConst *>(tconv->bias());
 
   // get weight of tconv
   auto filter = dynamic_cast<luci::CircleConst *>(tconv->filter());
@@ -96,10 +96,36 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add)
     return false;
 
   auto filter_out_chn = filter->dim(0).value();
-  if (filter_out_chn != scale->dim(0).value())
+  // allow scale/shift and bias shape of [N], [1,1,1,N]; BN works for "channel-wise"
+  auto srank = scale->rank() - 1;
+  if (filter_out_chn != scale->dim(srank).value())
     return false;
-  if (filter_out_chn != shift->dim(0).value())
+  for (uint32_t d = 0; d < srank; ++d)
+  {
+    if (1 != scale->dim(d).value())
+      return false;
+  }
+  srank = shift->rank() - 1;
+  if (filter_out_chn != shift->dim(srank).value())
     return false;
+  for (uint32_t d = 0; d < srank; ++d)
+  {
+    if (1 != shift->dim(d).value())
+      return false;
+  }
+  if (bias)
+  {
+    if (bias->dtype() != loco::DataType::FLOAT32)
+      return false;
+    srank = bias->rank() - 1;
+    if (filter_out_chn != bias->dim(srank).value())
+      return false;
+    for (uint32_t d = 0; d < srank; ++d)
+    {
+      if (1 != bias->dim(d).value())
+        return false;
+    }
+  }
 
   auto name = add->name();
   assert(name.length() > 0);
@@ -151,6 +177,11 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add)
   for (uint32_t c = 0; c < filter_out_chn; ++c)
   {
     fused_bias->at<loco::DataType::FLOAT32>(c) = shift->at<loco::DataType::FLOAT32>(c);
+    if (bias != nullptr)
+    {
+      fused_bias->at<loco::DataType::FLOAT32>(c) +=
+        bias->at<loco::DataType::FLOAT32>(c) * scale->at<loco::DataType::FLOAT32>(c);
+    }
   }
   fused_bias->name(name + "/TransposeConv/bias");
 
@@ -166,6 +197,10 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add)
   luci::add_origin(fused_tconv,
                    luci::composite_origin(
                      {luci::get_origin(add), luci::get_origin(mul), luci::get_origin(tconv)}));
+  if (bias != nullptr)
+  {
+    luci::add_origin(fused_tconv, luci::get_origin(bias));
+  }
 
   if (add->fusedActivationFunction() == luci::FusedActFunc::RELU6)
   {
diff --git a/compiler/luci/pass/src/FuseInstanceNormPass.cpp b/compiler/luci/pass/src/FuseInstanceNormPass.cpp
index f3ec6cd9e..10a651e35 100644
--- a/compiler/luci/pass/src/FuseInstanceNormPass.cpp
+++ b/compiler/luci/pass/src/FuseInstanceNormPass.cpp
@@ -325,6 +325,10 @@ public:
   }
 
 private:
+  bool condition_common_1_5(uint32_t ifm_channel_depth);
+  bool condition_common_3_4();
+
+private:
   template <enum PatternVersion> bool match();
 
 public:
@@ -368,21 +372,8 @@ private:
   if (not(condition))             \
     return false;
 
-template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion::Version_1>()
+bool InstanceNormPattern::condition_common_1_5(uint32_t ifm_channel_depth)
 {
-  CHECK_OR_FALSE(luci::fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal));
-  CHECK_OR_FALSE(luci::fill(&ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_ifm));
-
-  auto ifm_circle = loco::must_cast<luci::CircleNode *>(ifm);
-  CHECK_OR_FALSE(ifm_circle->shape_status() == luci::ShapeStatus::VALID);
-  CHECK_OR_FALSE(ifm_circle->rank() == 4);
-  CHECK_OR_FALSE(ifm_circle->dim(3).known());
-  uint32_t ifm_channel_depth = ifm_circle->dim(3).value();
-
-  CHECK_OR_FALSE(luci::fill(&rsqrt, &const_as_gamma).with_commutative_args_of(mul_gamma));
-
-  CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_gamma, ifm_channel_depth));
-
   add_as_variance = dynamic_cast<luci::CircleAdd *>(rsqrt->x());
   CHECK_OR_FALSE(add_as_variance);
 
@@ -408,6 +399,70 @@ template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion:
   CHECK_OR_FALSE(const_as_beta);
   CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_beta, ifm_channel_depth));
 
+  return true;
+}
+
+bool InstanceNormPattern::condition_common_3_4()
+{
+  // check left sub
+  ifm = sub->x();
+  CHECK_OR_FALSE(ifm);
+
+  luci::CircleNode *ifm_node = loco::must_cast<luci::CircleNode *>(ifm);
+  CHECK_OR_FALSE(ifm_node->rank() == 4);
+  CHECK_OR_FALSE(ifm_node->dim(3).known());
+
+  mean_of_ifm = dynamic_cast<luci::CircleMean *>(sub->y());
+  CHECK_OR_FALSE(mean_of_ifm);
+  CHECK_OR_FALSE(ifm == mean_of_ifm->input());
+
+  // continue search from add_as_variance
+  CHECK_OR_FALSE(luci::fill(&sqrt, &const_as_epsilon).with_commutative_args_of(add_as_variance));
+  CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
+  // TODO Support regarding broadcast
+  CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1);
+
+  mean_as_variance = dynamic_cast<luci::CircleMean *>(sqrt->x());
+  CHECK_OR_FALSE(mean_as_variance);
+
+  square = dynamic_cast<luci::CircleSquare *>(mean_as_variance->input());
+  CHECK_OR_FALSE(square);
+
+  sub_2 = dynamic_cast<luci::CircleSub *>(square->x());
+  CHECK_OR_FALSE(sub_2);
+  CHECK_OR_FALSE(ifm == sub_2->x());
+
+  mean_of_ifm_2 = dynamic_cast<luci::CircleMean *>(sub_2->y());
+  CHECK_OR_FALSE(mean_of_ifm_2);
+  CHECK_OR_FALSE(ifm == mean_of_ifm_2->input());
+
+  loco::Node *ifm_should_be = nullptr;
+  luci::CircleMean *mean_of_ifm_2_should_be = nullptr;
+  CHECK_OR_FALSE(
+    luci::fill(&ifm_should_be, &mean_of_ifm_2_should_be).with_commutative_args_of(sub_2));
+  CHECK_OR_FALSE(ifm == ifm_should_be);
+  CHECK_OR_FALSE(mean_of_ifm_2 == mean_of_ifm_2_should_be);
+
+  return true;
+}
+
+template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion::Version_1>()
+{
+  CHECK_OR_FALSE(luci::fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal));
+  CHECK_OR_FALSE(luci::fill(&ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_ifm));
+
+  auto ifm_circle = loco::must_cast<luci::CircleNode *>(ifm);
+  CHECK_OR_FALSE(ifm_circle->shape_status() == luci::ShapeStatus::VALID);
+  CHECK_OR_FALSE(ifm_circle->rank() == 4);
+  CHECK_OR_FALSE(ifm_circle->dim(3).known());
+  uint32_t ifm_channel_depth = ifm_circle->dim(3).value();
+
+  CHECK_OR_FALSE(luci::fill(&rsqrt, &const_as_gamma).with_commutative_args_of(mul_gamma));
+
+  CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_gamma, ifm_channel_depth));
+
+  CHECK_OR_FALSE(condition_common_1_5(ifm_channel_depth));
+
   luci::CircleMul *mul_gamma_should_be = nullptr;
   luci::CircleMean *mean_of_ifm_should_be = nullptr;
 
@@ -488,44 +543,7 @@ template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion:
   CHECK_OR_FALSE(luci::fill(&div, &const_as_gamma).with_commutative_args_of(mul_gamma));
   CHECK_OR_FALSE(luci::fill(&sub, &add_as_variance).with_commutative_args_of(div));
 
-  // check left sub
-  ifm = sub->x();
-  CHECK_OR_FALSE(ifm);
-
-  luci::CircleNode *ifm_node = loco::must_cast<luci::CircleNode *>(ifm);
-  CHECK_OR_FALSE(ifm_node->rank() == 4);
-  CHECK_OR_FALSE(ifm_node->dim(3).known());
-
-  mean_of_ifm = dynamic_cast<luci::CircleMean *>(sub->y());
-  CHECK_OR_FALSE(mean_of_ifm);
-  CHECK_OR_FALSE(ifm == mean_of_ifm->input());
-
-  // continue search from add_as_variance
-  CHECK_OR_FALSE(luci::fill(&sqrt, &const_as_epsilon).with_commutative_args_of(add_as_variance));
-  CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
-  // TODO Support regarding broadcast
-  CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1);
-
-  mean_as_variance = dynamic_cast<luci::CircleMean *>(sqrt->x());
-  CHECK_OR_FALSE(mean_as_variance);
-
-  square = dynamic_cast<luci::CircleSquare *>(mean_as_variance->input());
-  CHECK_OR_FALSE(square);
-
-  sub_2 = dynamic_cast<luci::CircleSub *>(square->x());
-  CHECK_OR_FALSE(sub_2);
-  CHECK_OR_FALSE(ifm == sub_2->x());
-
-  mean_of_ifm_2 = dynamic_cast<luci::CircleMean *>(sub_2->y());
-  CHECK_OR_FALSE(mean_of_ifm_2);
-  CHECK_OR_FALSE(ifm == mean_of_ifm_2->input());
-
-  loco::Node *ifm_should_be = nullptr;
-  luci::CircleMean *mean_of_ifm_2_should_be = nullptr;
-  CHECK_OR_FALSE(
-    luci::fill(&ifm_should_be, &mean_of_ifm_2_should_be).with_commutative_args_of(sub_2));
-  CHECK_OR_FALSE(ifm == ifm_should_be);
-  CHECK_OR_FALSE(mean_of_ifm_2 == mean_of_ifm_2_should_be);
+  CHECK_OR_FALSE(condition_common_3_4());
 
   _matched = true;
   return true;
@@ -546,44 +564,7 @@ template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion:
   CHECK_OR_FALSE(div);
   CHECK_OR_FALSE(luci::fill(&sub, &add_as_variance).with_commutative_args_of(div));
 
-  // check left sub
-  ifm = sub->x();
-  CHECK_OR_FALSE(ifm);
-
-  luci::CircleNode *ifm_node = loco::must_cast<luci::CircleNode *>(ifm);
-  CHECK_OR_FALSE(ifm_node->rank() == 4);
-  CHECK_OR_FALSE(ifm_node->dim(3).known());
-
-  mean_of_ifm = dynamic_cast<luci::CircleMean *>(sub->y());
-  CHECK_OR_FALSE(mean_of_ifm);
-  CHECK_OR_FALSE(ifm == mean_of_ifm->input());
-
-  // continue search from add_as_variance
-  CHECK_OR_FALSE(luci::fill(&sqrt, &const_as_epsilon).with_commutative_args_of(add_as_variance));
-  CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
-  // TODO Support regarding broadcast
-  CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1);
-
-  mean_as_variance = dynamic_cast<luci::CircleMean *>(sqrt->x());
-  CHECK_OR_FALSE(mean_as_variance);
-
-  square = dynamic_cast<luci::CircleSquare *>(mean_as_variance->input());
-  CHECK_OR_FALSE(square);
-
-  sub_2 = dynamic_cast<luci::CircleSub *>(square->x());
-  CHECK_OR_FALSE(sub_2);
-  CHECK_OR_FALSE(ifm == sub_2->x());
-
-  mean_of_ifm_2 = dynamic_cast<luci::CircleMean *>(sub_2->y());
-  CHECK_OR_FALSE(mean_of_ifm_2);
-  CHECK_OR_FALSE(ifm == mean_of_ifm_2->input());
-
-  loco::Node *ifm_should_be = nullptr;
-  luci::CircleMean *mean_of_ifm_2_should_be = nullptr;
-  CHECK_OR_FALSE(
-    luci::fill(&ifm_should_be, &mean_of_ifm_2_should_be).with_commutative_args_of(sub_2));
-  CHECK_OR_FALSE(ifm == ifm_should_be);
-  CHECK_OR_FALSE(mean_of_ifm_2 == mean_of_ifm_2_should_be);
+  CHECK_OR_FALSE(condition_common_3_4());
 
   assert(const_as_gamma == nullptr);
   assert(const_as_beta == nullptr);
@@ -612,30 +593,7 @@ template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion:
   CHECK_OR_FALSE(ifm_circle->dim(3).known());
   uint32_t ifm_channel_depth = ifm_circle->dim(3).value();
 
-  add_as_variance = dynamic_cast<luci::CircleAdd *>(rsqrt->x());
-  CHECK_OR_FALSE(add_as_variance);
-
-  CHECK_OR_FALSE(
-    luci::fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
-
-  CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
-  // TODO Support regarding broadcast
-  CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1);
-
-  CHECK_OR_FALSE(is_instance_mean_v1(mean_as_variance));
-
-  sqdiff = dynamic_cast<luci::CircleSquaredDifference *>(mean_as_variance->input());
-  CHECK_OR_FALSE(sqdiff);
-
-  loco::Node *ifm_should_be = nullptr;
-  CHECK_OR_FALSE(luci::fill(&ifm_should_be, &mean_of_ifm).with_commutative_args_of(sqdiff));
-  CHECK_OR_FALSE(ifm == ifm_should_be);
-  CHECK_OR_FALSE(is_instance_mean_v1(mean_of_ifm));
-  CHECK_OR_FALSE(ifm == mean_of_ifm->input());
-
-  const_as_beta = dynamic_cast<luci::CircleConst *>(sub->x());
-  CHECK_OR_FALSE(const_as_beta);
-  CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_beta, ifm_channel_depth));
+  CHECK_OR_FALSE(condition_common_1_5(ifm_channel_depth));
 
   luci::CircleRsqrt *rsqrt_should_be = nullptr;
   luci::CircleMean *mean_of_ifm_should_be = nullptr;
diff --git a/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp b/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp
index b4975486d..e8fa2a478 100644
--- a/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp
+++ b/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp
@@ -23,6 +23,7 @@
 #include <luci/Log.h>
 
 #include <cmath>
+#include <limits>
 
 namespace
 {
diff --git a/compiler/luci/pass/src/PropagateQParamForwardPass.cpp b/compiler/luci/pass/src/PropagateQParamForwardPass.cpp
index 003e4c293..aaadb2864 100644
--- a/compiler/luci/pass/src/PropagateQParamForwardPass.cpp
+++ b/compiler/luci/pass/src/PropagateQParamForwardPass.cpp
@@ -138,13 +138,18 @@ struct PropagateQParamForward final : public luci::CircleNodeMutableVisitor<bool
     auto qtype = luci::activation_qtype(input_node);
     switch (qtype)
     {
-      case luci::ActivationQType::PreDefinedValue:
-        node->quantparam(luci::make_predefined_qparam(input_node->opcode(), node->dtype()));
+      case luci::ActivationQType::PreDefinedLogistic:
+      case luci::ActivationQType::PreDefinedTanh:
+      case luci::ActivationQType::PreDefinedSoftmax:
+        node->quantparam(luci::make_predefined_qparam(qtype, node->dtype()));
         break;
       case luci::ActivationQType::IntScale:
         luci::set_int_scale(node);
         break;
       default:
+        // This assert ensures this switch-satement handles all ActivationQTypes
+        // TODO Find a better design to remove coupling with ActivationQType
+        assert(qtype == luci::ActivationQType::MinMax);
         break;
     }
 
diff --git a/compiler/luci/pass/src/QuantizationUtils.cpp b/compiler/luci/pass/src/QuantizationUtils.cpp
index ad86cedf4..06a4ae9f6 100644
--- a/compiler/luci/pass/src/QuantizationUtils.cpp
+++ b/compiler/luci/pass/src/QuantizationUtils.cpp
@@ -20,6 +20,7 @@
 
 #include <iostream>
 #include <cmath>
+#include <limits>
 
 namespace luci
 {
@@ -276,31 +277,70 @@ uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices)
          indices[2] * dimension.dim(3).value() + indices[3];
 }
 
+// Activation (ofm) qtype is determined in different ways.
+// 1. Pre-defined values: Some Ops have pre-defined qparams (ex: LOGISTIC, TANH)
+// 2. Integer scale: Output of some Ops should be integers (ex: FLOOR, CEIL)
+// 3. Activation qtype of input: Some Ops propagate qparam from input to output (ex: QUANTIZE,
+// TRANSPOSE, etc. See PropagateQParamForwardPass.cpp for more details).
 ActivationQType activation_qtype(const CircleNode *node)
 {
   auto fused_act_node = dynamic_cast<const CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(node);
   if (fused_act_node && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH)
-    return ActivationQType::PreDefinedValue;
+    return ActivationQType::PreDefinedTanh;
+
+#define RETURN_INPUT_ACTIVATION_QTYPE(CLASS, INPUT)         \
+  {                                                         \
+    auto n = loco::must_cast<const CLASS *>(node);          \
+    auto input = loco::must_cast<CircleNode *>(n->INPUT()); \
+    return activation_qtype(input);                         \
+  }
 
   switch (node->opcode())
   {
     case CircleOpcode::LOGISTIC:
+      return ActivationQType::PreDefinedLogistic;
     case CircleOpcode::TANH:
+      return ActivationQType::PreDefinedTanh;
     case CircleOpcode::SOFTMAX:
-      return ActivationQType::PreDefinedValue;
+      return ActivationQType::PreDefinedSoftmax;
     case CircleOpcode::FLOOR:
     case CircleOpcode::FLOOR_DIV:
     case CircleOpcode::FLOOR_MOD:
     case CircleOpcode::CEIL:
       return ActivationQType::IntScale;
+    case CircleOpcode::GATHER:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleGather, params);
+    case CircleOpcode::RESHAPE:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleReshape, tensor);
+    case CircleOpcode::TRANSPOSE:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleTranspose, a);
+    case CircleOpcode::STRIDED_SLICE:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleStridedSlice, input);
+    case CircleOpcode::SPLIT:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleSplit, input);
+    case CircleOpcode::CIRCLESPLITOUT:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitOut, input);
+    case CircleOpcode::SPLIT_V:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitV, input);
+    case CircleOpcode::CIRCLESPLITVOUT:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitVOut, input);
+    case CircleOpcode::UNPACK:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleUnpack, value);
+    case CircleOpcode::CIRCLEUNPACKOUT:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleUnpackOut, input);
+    case CircleOpcode::QUANTIZE:
+      RETURN_INPUT_ACTIVATION_QTYPE(CircleQuantize, input);
     default:
       break;
   }
 
+#undef RETURN_INPUT_ACTIVATION_QTYPE
+
   return ActivationQType::MinMax;
 }
 
-std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, loco::DataType dtype)
+std::unique_ptr<CircleQuantParam> make_predefined_qparam(ActivationQType qtype,
+                                                         loco::DataType dtype)
 {
   auto qparam = std::make_unique<CircleQuantParam>();
 
@@ -309,9 +349,9 @@ std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, lo
     qparam->zerop.emplace_back(zp);
   };
 
-  switch (opcode)
+  switch (qtype)
   {
-    case CircleOpcode::LOGISTIC:
+    case ActivationQType::PreDefinedLogistic:
       if (dtype == loco::DataType::U8)
         set_qparam(1.0f / 256.0f, 0);
       else
@@ -320,7 +360,7 @@ std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, lo
         set_qparam(1.0f / 32768.0f, 0);
       }
       break;
-    case CircleOpcode::TANH:
+    case ActivationQType::PreDefinedTanh:
       if (dtype == loco::DataType::U8)
         set_qparam(2.0f / 256.0f, 128);
       else
@@ -329,7 +369,7 @@ std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, lo
         set_qparam(1.0f / 32768.0f, 0);
       }
       break;
-    case CircleOpcode::SOFTMAX:
+    case ActivationQType::PreDefinedSoftmax:
       if (dtype == loco::DataType::U8)
         set_qparam(1.0f / 255.0f, 0);
       else
@@ -341,7 +381,7 @@ std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, lo
     default:
       throw std::runtime_error("Unsupported opcode with pre-defined qparam");
   }
-  return std::move(qparam);
+  return qparam;
 }
 
 // For nodes with integer output, we use integer scale
@@ -395,4 +435,74 @@ void quant_const(luci::CircleConst *node, loco::DataType quant_type)
   node->quantparam(std::move(quantparam));
 }
 
+namespace
+{
+
+// TODO move this to a more global helper file
+int nbits(loco::DataType dt) noexcept
+{
+  switch (dt)
+  {
+    case loco::DataType::S8:
+    case loco::DataType::U8:
+      return 8;
+    case loco::DataType::S16:
+    case loco::DataType::U16:
+    case loco::DataType::FLOAT16:
+      return 16;
+    case loco::DataType::S32:
+    case loco::DataType::U32:
+    case loco::DataType::FLOAT32:
+      return 32;
+    case loco::DataType::S64:
+      return 64;
+    default:
+      return 64; // a safe large default
+  }
+}
+
+// TODO Check if the metric is valid
+// Returns true if [min,max] is poorly representable
+bool range_check(float min, float max, loco::DataType dtype)
+{
+  float thresh = 1.5f;
+  return log2f(max) - log2f(min) > nbits(dtype) * thresh;
+}
+
+bool warn_scale_zp(float scale, int64_t zp, luci::CircleNode *n)
+{
+  float min, max;
+  // estimate min/max
+  switch (n->dtype())
+  {
+    case loco::DataType::U8:
+      min = scale * (0 - zp);
+      max = scale * (255 - zp);
+      break;
+    case loco::DataType::S16:
+      min = scale * (-32767);
+      max = scale * (32767);
+      break;
+    default:
+      return false;
+  }
+  return range_check(min, max, n->dtype());
+}
+
+} // namespace
+
+void warn_accuracy_with_range(luci::CircleNode *n)
+{
+  LOGGER(l);
+  auto qp = n->quantparam();
+  auto k = qp->zerop.size();
+  for (uint32_t i = 0; i < k; i++)
+  {
+    if (warn_scale_zp(qp->scale[i], qp->zerop[i], n))
+      WARN(l) << "Quantization of " << i << "-th channel of " << n->name()
+              << "'s quantization may cause accuracy issues" << std::endl;
+    ;
+  }
+}
+
 } // namespace luci
diff --git a/compiler/luci/pass/src/QuantizationUtils.h b/compiler/luci/pass/src/QuantizationUtils.h
index cd8cec95a..4d5316ccb 100644
--- a/compiler/luci/pass/src/QuantizationUtils.h
+++ b/compiler/luci/pass/src/QuantizationUtils.h
@@ -62,15 +62,19 @@ bool is_quantized(const CircleNode *node);
 
 enum ActivationQType
 {
-  MinMax,          // Quantize using recorded min/max
-  PreDefinedValue, // Quantize using pre-defined values
-  IntScale,        // Round scale to a positive integer
+  MinMax,             // Quantize using recorded min/max
+  PreDefinedLogistic, // Quantize using pre-defined values
+  PreDefinedTanh,     // Quantize using pre-defined values
+  PreDefinedSoftmax,  // Quantize using pre-defined values
+  IntScale,           // Round scale to a positive integer
 };
 
 ActivationQType activation_qtype(const CircleNode *node);
 
 // Create qparam with pre-defined values for speical operators
-std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, loco::DataType dtype);
+std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleNode *node, loco::DataType dtype);
+std::unique_ptr<CircleQuantParam> make_predefined_qparam(ActivationQType qtype,
+                                                         loco::DataType dtype);
 
 // Update node's scale to a positive integer (for special Ops e.g., Floor, Ceil)
 void set_int_scale(luci::CircleNode *node);
@@ -78,6 +82,10 @@ void set_int_scale(luci::CircleNode *node);
 // Quantize const tensor using its min/max values
 void quant_const(luci::CircleConst *node, loco::DataType quant_type);
 
+// Check that a node is quantized without significant loss of precision;
+// Emits warnings to log with WARN
+void warn_accuracy_with_range(luci::CircleNode *n);
+
 } // namespace luci
 
 #endif // __LUCI_QUANTIZATION_UTILS_H__
diff --git a/compiler/luci/pass/src/QuantizeActivation.cpp b/compiler/luci/pass/src/QuantizeActivation.cpp
index 149331824..95251a82c 100644
--- a/compiler/luci/pass/src/QuantizeActivation.cpp
+++ b/compiler/luci/pass/src/QuantizeActivation.cpp
@@ -114,29 +114,26 @@ void QuantizeSpecialActivation::visit(luci::CircleNode *node)
   auto fused_act_node = dynamic_cast<CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(node);
   if (fused_act_node != nullptr && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH)
   {
-    auto qparam = make_predefined_qparam(luci::CircleOpcode::TANH, output_type);
+    auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedTanh, output_type);
     node->quantparam(std::move(qparam));
   }
 }
 
 void QuantizeSpecialActivation::visit(luci::CircleLogistic *node)
 {
-  assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue);
-  auto qparam = make_predefined_qparam(luci::CircleOpcode::LOGISTIC, output_type);
+  auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedLogistic, output_type);
   node->quantparam(std::move(qparam));
 }
 
 void QuantizeSpecialActivation::visit(luci::CircleTanh *node)
 {
-  assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue);
-  auto qparam = make_predefined_qparam(luci::CircleOpcode::TANH, output_type);
+  auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedTanh, output_type);
   node->quantparam(std::move(qparam));
 }
 
 void QuantizeSpecialActivation::visit(luci::CircleSoftmax *node)
 {
-  assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue);
-  auto qparam = make_predefined_qparam(luci::CircleOpcode::SOFTMAX, output_type);
+  auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedSoftmax, output_type);
   node->quantparam(std::move(qparam));
 }
 
diff --git a/compiler/luci/pass/src/QuantizeBias.cpp b/compiler/luci/pass/src/QuantizeBias.cpp
index aa496232a..de97a14dd 100644
--- a/compiler/luci/pass/src/QuantizeBias.cpp
+++ b/compiler/luci/pass/src/QuantizeBias.cpp
@@ -22,6 +22,7 @@
 
 #include <algorithm>
 #include <cmath>
+#include <limits>
 
 using namespace luci;
 
@@ -201,6 +202,18 @@ CircleConst *QuantizeBias::quantized_bias(CircleNode *input, const CircleNode *w
     std::vector<float> scaling_factor(size);
     std::vector<int64_t> zp(size);
 
+    if (const_bias->rank() == 0)
+    {
+      // TODO Support quantization of scalar bias
+      throw std::runtime_error("Quantization of scalar bias is not yet supported (" +
+                               const_bias->name() + ")");
+    }
+    if (size != const_bias->dim(const_bias->rank() - 1).value())
+    {
+      throw std::runtime_error(const_bias->name() +
+                               " (bias) should have the shape of [1, 1, .. 1, channel]");
+    }
+
     if (output_type == loco::DataType::U8)
     {
       new_bias = quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp);
@@ -218,6 +231,7 @@ CircleConst *QuantizeBias::quantized_bias(CircleNode *input, const CircleNode *w
     auto quantparam = std::make_unique<CircleQuantParam>();
     quantparam->scale = scaling_factor;
     quantparam->zerop = zp;
+    quantparam->quantized_dimension = const_bias->rank() - 1;
     assert(new_bias->quantparam() == nullptr); // bias should not be quantized before
     new_bias->quantparam(std::move(quantparam));
 
diff --git a/compiler/luci/pass/src/QuantizeBias.test.cpp b/compiler/luci/pass/src/QuantizeBias.test.cpp
new file mode 100644
index 000000000..0104a191b
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeBias.test.cpp
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizeBias.h"
+
+#include <luci/test/TestIOGraph.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleQuantParam.h>
+
+#include <gtest/gtest.h>
+
+using namespace luci;
+
+namespace
+{
+
+using namespace luci::test;
+
+// TODO Reduce duplicate codes in ResolveCustomOpMatMulPass.cpp
+template <typename T>
+luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype,
+                                     const std::vector<uint32_t> &shape, T value)
+{
+  auto node = g->nodes()->create<luci::CircleConst>();
+  node->dtype(dtype);
+  node->rank(shape.size());
+
+  uint32_t size = 1;
+  for (uint32_t i = 0; i < shape.size(); ++i)
+  {
+    node->dim(i) = shape.at(i);
+    size *= shape.at(i);
+  }
+  node->shape_status(luci::ShapeStatus::VALID);
+
+#define INIT_VALUES(DT)                 \
+  {                                     \
+    node->size<DT>(size);               \
+    for (uint32_t i = 0; i < size; ++i) \
+      node->at<DT>(i) = value;          \
+  }
+
+  switch (dtype)
+  {
+    case loco::DataType::U8:
+      INIT_VALUES(loco::DataType::U8);
+      break;
+    case loco::DataType::S16:
+      INIT_VALUES(loco::DataType::S16);
+      break;
+    case loco::DataType::S32:
+      INIT_VALUES(loco::DataType::S32);
+      break;
+    case loco::DataType::FLOAT32:
+      INIT_VALUES(loco::DataType::FLOAT32)
+      break;
+    default:
+      INTERNAL_EXN("create_const_node called with unsupported type");
+      break;
+  }
+  return node;
+}
+
+/**
+ *  Simple graph for test
+ *
+ *  BEFORE
+ *
+ *   [IFM] [WEIGHTS] [BIAS(FP32)]
+ *        \   |     /
+ *           [FC]
+ *            |
+ *          [OFM]
+ *
+ *  AFTER
+ *
+ *   [IFM] [WEIGHTS] [BIAS(Quantized)]
+ *        \   |     /
+ *           [FC]
+ *            |
+ *          [OFM]
+ */
+struct Q8FCGraphlet
+{
+public:
+  Q8FCGraphlet() = default;
+  virtual ~Q8FCGraphlet() = default;
+
+  void init(loco::Graph *g, const ShapeU32 out_shape, const ShapeU32 w_shape,
+            const ShapeU32 bias_shape, const float bv)
+  {
+    _fc = g->nodes()->create<luci::CircleFullyConnected>();
+    _fc->input(_x);
+    _x->dtype(loco::DataType::U8);
+    {
+      auto quantparam = std::make_unique<CircleQuantParam>();
+      quantparam->scale.push_back(1.0);
+      quantparam->zerop.push_back(0);
+      quantparam->quantized_dimension = 0;
+      _x->quantparam(std::move(quantparam));
+    }
+
+    auto weights = create_const_node<uint8_t>(g, loco::DataType::U8, w_shape, 1.0);
+    auto w_qparam = std::make_unique<CircleQuantParam>();
+    std::vector<float> w_scale(weights->dim(0).value(), 1.0);
+    std::vector<int64_t> w_zp(weights->dim(0).value(), 0);
+    w_qparam->scale = w_scale;
+    w_qparam->zerop = w_zp;
+    w_qparam->quantized_dimension = 0;
+    weights->quantparam(std::move(w_qparam));
+    _fc->weights(weights);
+    _fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _fc->dtype(loco::DataType::U8);
+    _fc->shape(out_shape);
+    auto l = _fc->dim(_fc->rank() - 1).value();
+    _fc->bias(create_const_node(g, loco::DataType::FLOAT32, bias_shape, bv));
+    _fc->name("fc");
+    {
+      auto quantparam = std::make_unique<CircleQuantParam>();
+      quantparam->scale.push_back(1.0);
+      quantparam->zerop.push_back(0);
+      quantparam->quantized_dimension = 0;
+      _fc->quantparam(std::move(quantparam));
+    }
+  }
+
+public:
+  luci::CircleFullyConnected *fc() { return _fc; }
+
+protected:
+  luci::CircleFullyConnected *_fc = nullptr;
+  luci::CircleInput *_x = nullptr;
+};
+
+struct Q8FCGraph final : public TestIGraphlet, public TestOGraphlet, public Q8FCGraphlet
+{
+  void init(const ShapeU32 in_shape, const ShapeU32 w_shape, const ShapeU32 out_shape,
+            const ShapeU32 bias_shape, const float bv)
+  {
+    TestIGraphlet::init(g(), in_shape);
+    TestOGraphlet::init(g(), out_shape);
+    _x = input();
+    Q8FCGraphlet::init(g(), out_shape, w_shape, bias_shape, bv);
+    output()->from(_fc);
+  }
+};
+
+class CQ8QuantizeBiasFCTest : public ::testing::Test
+{
+public:
+  Q8FCGraph g;
+  luci::QuantizeBias qb{loco::DataType::FLOAT32, loco::DataType::U8,
+                        luci::QuantizationGranularity::ChannelWise};
+};
+
+} // namespace
+
+TEST_F(CQ8QuantizeBiasFCTest, fully_connected)
+{
+  g.init({1, 18, 80}, {256, 80}, {18, 256}, {1, 256}, 1);
+  g.fc()->accept(&qb);
+
+  auto bias = loco::must_cast<CircleConst *>(g.fc()->bias());
+  auto qparam = bias->quantparam();
+
+  EXPECT_NE(nullptr, qparam);
+  EXPECT_EQ(256, qparam->scale.size());
+  EXPECT_EQ(256, qparam->zerop.size());
+  EXPECT_EQ(1, qparam->quantized_dimension);
+}
+
+TEST_F(CQ8QuantizeBiasFCTest, wrong_bias_shape_NEG)
+{
+  g.init({1, 18, 80}, {256, 80}, {18, 256}, {1, 2, 128}, 1);
+  EXPECT_ANY_THROW(g.fc()->accept(&qb)); // Wrong bias shape
+}
diff --git a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
index c9b35e0be..ef047d35d 100644
--- a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
+++ b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
@@ -27,6 +27,7 @@
 #include <iostream>
 #include <cmath>
 #include <functional>
+#include <limits>
 
 namespace
 {
@@ -352,15 +353,15 @@ private:
 private:
   // Check if
   // 1. node is const
-  // 2. node was not quantized
+  // 2. node's dtype is float32
   bool is_quantizable(loco::Node *node)
   {
     auto const_node = dynamic_cast<luci::CircleConst *>(node);
     if (not const_node)
       return false;
 
-    // Skip if this is already quantized
-    if (is_quantized(const_node))
+    // Skip if this is not float32
+    if (const_node->dtype() != loco::DataType::FLOAT32)
       return false;
 
     return true;
diff --git a/compiler/luci/pass/src/QuantizeWeights.cpp b/compiler/luci/pass/src/QuantizeWeights.cpp
index 11322ab44..500ae12ed 100644
--- a/compiler/luci/pass/src/QuantizeWeights.cpp
+++ b/compiler/luci/pass/src/QuantizeWeights.cpp
@@ -23,6 +23,7 @@
 #include <cmath>
 #include <vector>
 #include <functional>
+#include <limits>
 
 using namespace luci;
 
diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
index d9a9d4db7..005144516 100644
--- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
+++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
@@ -41,10 +41,28 @@ namespace
 {
 
 using namespace luci;
+
+bool use_predefined_values(ActivationQType qtype)
+{
+  switch (qtype)
+  {
+    case ActivationQType::PreDefinedLogistic:
+    case ActivationQType::PreDefinedTanh:
+    case ActivationQType::PreDefinedSoftmax:
+      return true;
+    default:
+      // This ensures this switch-statement handles all ActivationQTypes
+      assert(qtype == ActivationQType::IntScale or qtype == ActivationQType::MinMax);
+      break;
+  }
+
+  return false;
+}
+
 // Create a Quantize Op whose
 // dtype is out_type
 // shape is the same with node
-// qparam is computed using node's min/max
+// qparam is computed according to node's qtype
 luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType out_type)
 {
   auto quantize = node->graph()->nodes()->create<CircleQuantize>();
@@ -60,9 +78,9 @@ luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType
   assert(qparam); // FIX_CALLER_UNLESS
 
   auto qtype = luci::activation_qtype(node);
-  if (qtype == ActivationQType::PreDefinedValue)
+  if (use_predefined_values(qtype))
   {
-    quantize->quantparam(luci::make_predefined_qparam(node->opcode(), out_type));
+    quantize->quantparam(luci::make_predefined_qparam(qtype, out_type));
     return quantize;
   }
 
@@ -105,6 +123,23 @@ luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType
   return quantize;
 }
 
+// Create Dequantize Op whose shape is the same with node
+luci::CircleDequantize *create_dequantize(luci::CircleNode *node)
+{
+  auto dequantize = node->graph()->nodes()->create<luci::CircleDequantize>();
+  dequantize->name(node->name() + "_Dequantize");
+  dequantize->dtype(loco::DataType::FLOAT32);
+  dequantize->rank(node->rank());
+  for (uint32_t i = 0; i < node->rank(); i++)
+    dequantize->dim(i).set(node->dim(i).value());
+
+  dequantize->shape_status(luci::ShapeStatus::VALID);
+
+  luci::add_origin(dequantize, luci::get_origin(node));
+
+  return dequantize;
+}
+
 } // namespace
 
 namespace luci
@@ -229,11 +264,13 @@ private:
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleFullyConnected, input)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleGather, params)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleInstanceNorm, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLeakyRelu, features)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLocalResponseNormalization, input)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLogistic, x)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMaxPool2D, value)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMean, input)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMirrorPad, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleNeg, x)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePad, input)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePadV2, input)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePRelu, input)
@@ -241,6 +278,7 @@ private:
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMax, input)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMin, input)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRelu, features)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRelu6, features)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReshape, tensor)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeBilinear, input)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeNearestNeighbor, input)
@@ -250,6 +288,7 @@ private:
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSoftmax, logits)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToBatchND, input)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToDepth, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSqueeze, input)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSqrt, x)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleStridedSlice, input)
   INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSum, input)
@@ -353,7 +392,9 @@ void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const
       luci::add_origin(quant_op, luci::get_origin(succ));
     }
 
-    // Requantize input
+    // Update qparam of input
+    // This step is skipped if input_type is float32
+    if (_ctx->input_type != loco::DataType::FLOAT32)
     {
       auto quantparam = input->quantparam();
       assert(quantparam);
@@ -376,11 +417,13 @@ void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const
         assert(_ctx->input_type == loco::DataType::S16);
         compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
       }
-      input->dtype(_ctx->input_type);
       input->quantparam()->scale[0] = scaling_factor;
       input->quantparam()->zerop[0] = zp;
     }
 
+    // Update dtype of input
+    input->dtype(_ctx->input_type);
+
     auto graph_input = inputs->at(input->index());
     graph_input->dtype(_ctx->input_type);
   }
@@ -405,13 +448,26 @@ void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const
     if (not from->quantparam())
       continue;
 
-    // Insert Quantize Op
-    auto quant_op = create_quantize_op(from, _ctx->output_type);
-    loco::replace(from).with(quant_op);
-    quant_op->input(from);
+    // Insert Dequantize Op for float32 output_type
+    if (_ctx->output_type == loco::DataType::FLOAT32)
+    {
+      auto dequant_op = create_dequantize(from);
+      loco::replace(from).with(dequant_op);
+      dequant_op->input(from);
+    }
+    else
+    {
+      // Insert Quantize Op for non-float32 output_type
+      auto quant_op = create_quantize_op(from, _ctx->output_type);
+      loco::replace(from).with(quant_op);
+      quant_op->input(from);
 
-    // TODO Set a proper origin (Quantize should have its own Origin)
-    luci::add_origin(quant_op, luci::get_origin(from));
+      // TODO Set a proper origin (Quantize should have its own Origin)
+      luci::add_origin(quant_op, luci::get_origin(from));
+    }
+
+    // Update dtype of output
+    output->dtype(_ctx->output_type);
 
     auto graph_output = outputs->at(output->index());
     graph_output->dtype(_ctx->output_type);
@@ -594,12 +650,25 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
   // Set output type
   set_output_type(g);
 
+  // Remove redundant Quantize Op
+  {
+    logo::Phase phase;
+
+    phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>());
+
+    ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
+    logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+    phase_runner.attach(&prog);
+    phase_runner.run(phase);
+  }
+
   // Remove min/max values
   for (auto node : loco::active_nodes(loco::output_nodes(g)))
   {
     auto circle_node = loco::must_cast<luci::CircleNode *>(node);
     if (auto qparam = circle_node->quantparam())
     {
+      warn_accuracy_with_range(circle_node);
       qparam->min.clear();
       qparam->max.clear();
     }
diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
index cebafd32b..21b4fe1c6 100644
--- a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
+++ b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
@@ -1088,6 +1088,31 @@ private:
   luci::CircleConst *_const = nullptr;
 };
 
+class ReduceMaxTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({4, 3, 2}, {2});
+
+    _axis = create_const<Type::S32, int32_t>(g(), {4}, {1, 0, -3, -3});
+    _reduce_max = g()->nodes()->create<luci::CircleReduceMax>();
+    {
+      _reduce_max->input(input());
+      _reduce_max->reduction_indices(_axis);
+      _reduce_max->name("test");
+      _reduce_max->keep_dims(false);
+    }
+    output()->from(_reduce_max);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleReduceMax *_reduce_max = nullptr;
+  luci::CircleConst *_axis = nullptr;
+};
+
 class ResizeBilinearTestGraph final : public SimpleTestGraph
 {
 public:
@@ -2345,6 +2370,34 @@ TEST(QuantizedModelVerifierTest, Pow_wrong_granularity_NEG)
   SUCCEED();
 }
 
+TEST(QuantizedModelVerifierTest, ReduceMax)
+{
+  TEST_WITH_GRAPH(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ReduceMax_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ReduceMax_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
 TEST(QuantizedModelVerifierTest, ResizeBilinear)
 {
   TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise);
diff --git a/compiler/luci/pass/src/RemoveRedundantDequantizePass.cpp b/compiler/luci/pass/src/RemoveRedundantDequantizePass.cpp
new file mode 100644
index 000000000..66cd9d791
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantDequantizePass.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveRedundantDequantizePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+bool remove_redundant_dequant(luci::CircleDequantize *dequant)
+{
+  assert(dequant != nullptr);
+
+  auto prev = loco::must_cast<luci::CircleNode *>(dequant->input());
+  if (prev->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  replace(dequant).with(prev);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+/**
+ * Dequantize Op does the below things on the ifm.
+ * 1. Element-wise update of quantized values (u8/s16) to fp32 values
+ * 2. Update dtype to fp32
+ * If the previous node is not quantized, dequantize Op is redundant.
+ *
+ * BEFORE
+ *
+ *     [CircleNode (A)]
+ *            |
+ *     [CircleNode (B)] (fp32)
+ *            |
+ *    [CircleDequantize]
+ *            |
+ *       [CircleNode]
+ *
+ * AFTER
+ *
+ *     [CircleNode (A)]
+ *            |
+ *     [CircleNode (B)] (fp32)
+ *            |
+ *       [CircleNode]
+ */
+bool RemoveRedundantDequantizePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto target_node = dynamic_cast<luci::CircleDequantize *>(node);
+    if (target_node != nullptr)
+    {
+      if (remove_redundant_dequant(target_node))
+        changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveRedundantDequantizePass.test.cpp b/compiler/luci/pass/src/RemoveRedundantDequantizePass.test.cpp
new file mode 100644
index 000000000..adb2f14a4
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantDequantizePass.test.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveRedundantDequantizePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class DequantizeGraphlet
+{
+public:
+  DequantizeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    _dequantize = g->nodes()->create<luci::CircleDequantize>();
+    _dequantize->dtype(loco::DataType::FLOAT32);
+    _dequantize->name("dequantize");
+  }
+
+protected:
+  luci::CircleDequantize *_dequantize = nullptr;
+};
+
+class RedundantDequantizeGraph : public TestIOGraph, public DequantizeGraphlet
+{
+public:
+  RedundantDequantizeGraph() = default;
+
+public:
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    DequantizeGraphlet::init(g());
+
+    _dequantize->input(input());
+
+    output()->from(_dequantize);
+  }
+
+  void init_u8_input(void)
+  {
+    TestIOGraph::init({1}, {1});
+    DequantizeGraphlet::init(g());
+
+    // Use u8 input (dequantize is not redundant anymore)
+    input()->dtype(loco::DataType::U8);
+    {
+      auto qparam = std::make_unique<luci::CircleQuantParam>();
+      qparam->scale = {1};
+      qparam->zerop = {1};
+      input()->quantparam(std::move(qparam));
+    }
+
+    _dequantize->input(input());
+
+    output()->from(_dequantize);
+  }
+};
+
+} // namespace
+
+TEST(RemoveRedundantDequantizePass, single_redundant_dequantize)
+{
+  RedundantDequantizeGraph g;
+  luci::RemoveRedundantDequantizePass pass;
+
+  g.init();
+
+  EXPECT_TRUE(pass.run(g.g()));
+
+  int count = 0;
+  for (auto node : loco::active_nodes(loco::output_nodes(g.g())))
+  {
+    if (dynamic_cast<luci::CircleDequantize *>(node))
+    {
+      count++;
+    }
+  }
+
+  ASSERT_EQ(0, count);
+}
+
+TEST(RemoveRedundantDequantizePass, wrong_dtype_NEG)
+{
+  RedundantDequantizeGraph g;
+  luci::RemoveRedundantDequantizePass pass;
+
+  g.init_u8_input();
+
+  EXPECT_FALSE(pass.run(g.g()));
+}
diff --git a/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.cpp b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.cpp
new file mode 100644
index 000000000..476ec68bf
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessaryReshapeNetPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+bool acceptable_intermediate_op(const loco::Node *node)
+{
+  if (not node)
+    return false;
+
+  const auto opcode = loco::must_cast<const luci::CircleNode *>(node)->opcode();
+
+  switch (opcode)
+  {
+    case luci::CircleOpcode::ADD:
+    case luci::CircleOpcode::MUL:
+    case luci::CircleOpcode::TANH:
+    case luci::CircleOpcode::LOGISTIC:
+      break;
+
+    default:
+      return false;
+  }
+
+  return true;
+}
+
+bool same_shape(const loco::Node *a, const loco::Node *b)
+{
+  auto a_cnode = loco::must_cast<const luci::CircleNode *>(a);
+  auto b_cnode = loco::must_cast<const luci::CircleNode *>(b);
+
+  if (a_cnode->rank() != b_cnode->rank())
+    return false;
+
+  for (uint32_t i = 0; i < a_cnode->rank(); i++)
+  {
+    if (not(a_cnode->dim(i) == b_cnode->dim(i)))
+      return false;
+  }
+  return true;
+}
+
+class PreReshapeFinder
+{
+public:
+  PreReshapeFinder(const luci::CircleReshape *post_reshape) : _post_reshape(post_reshape)
+  {
+    assert(post_reshape != nullptr); // FIX_CALLER_UNLESS
+  }
+
+public:
+  // Return true if pre_reshapes are found
+  bool collect_pre_reshapes(loco::Node *node)
+  {
+    // TODO Support diamond case
+    if (loco::succs(node).size() != 1)
+      return false;
+
+    if (auto pre_reshape = dynamic_cast<luci::CircleReshape *>(node))
+    {
+      // Check ifm of pre-reshape and ofm of post_reshape
+      if (not same_shape(pre_reshape->tensor(), _post_reshape))
+        return false;
+
+      // Check ofm of pre-reshape and ifm of post_reshape
+      if (not same_shape(pre_reshape, _post_reshape->tensor()))
+        return false;
+
+      _pre_reshapes.emplace_back(pre_reshape);
+      return true;
+    }
+
+    if (not acceptable_intermediate_op(node))
+      return false;
+
+    for (uint32_t i = 0; i < node->arity(); i++)
+    {
+      if (not collect_pre_reshapes(node->arg(i)))
+        return false;
+    }
+
+    return true;
+  }
+
+public:
+  std::vector<luci::CircleReshape *> pre_reshapes(void) const { return _pre_reshapes; }
+
+private:
+  const luci::CircleReshape *_post_reshape = nullptr;
+  std::vector<luci::CircleReshape *> _pre_reshapes;
+};
+
+bool remove_unnecessary_reshape_net(luci::CircleReshape *reshape)
+{
+  PreReshapeFinder finder(reshape);
+  if (not finder.collect_pre_reshapes(reshape->tensor()))
+    return false;
+
+  // Remove pre_reshapes
+  for (auto pre_reshape : finder.pre_reshapes())
+  {
+    loco::replace(pre_reshape).with(pre_reshape->tensor());
+  }
+
+  // Remove post_reshape
+  loco::replace(reshape).with(reshape->tensor());
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *
+ *      [CircleNode]
+ *            |
+ *    [CircleReshape_1] (shape: A -> B)
+ *            |
+ *      [CircleNode] (ex: Add/Mul/Tanh/Logistic ..)
+ *            |
+ *    [CircleReshape_2] (shape: B -> A)
+ *            |
+ *      [CircleNode]
+ *
+ * AFTER
+ *
+ *      [CircleNode]
+ *            |   \
+ *            |   [CircleReshape_1]
+ *      [CircleNode]
+ *            |   \
+ *            |   [CircleReshape_2]
+ *      [CircleNode]
+ **/
+bool RemoveUnnecessaryReshapeNetPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto reshape_node = dynamic_cast<luci::CircleReshape *>(node))
+    {
+      if (remove_unnecessary_reshape_net(reshape_node))
+        changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.test.cpp b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.test.cpp
new file mode 100644
index 000000000..4ad707ba3
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.test.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/RemoveUnnecessaryReshapeNetPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class RemoveUnnecessaryReshapeNet : public ::testing::Test
+{
+public:
+  RemoveUnnecessaryReshapeNet() {}
+
+  void createReshapeConst(luci::CircleReshape *target, const std::vector<uint32_t> shape)
+  {
+    auto shape_const = g.nodes()->create<luci::CircleConst>();
+    shape_const->dtype(loco::DataType::S32);
+    shape_const->size<loco::DataType::S32>(shape.size());
+    shape_const->shape_status(luci::ShapeStatus::VALID);
+    shape_const->rank(1);
+    shape_const->dim(0).set(shape.size());
+    for (int32_t i = 0; i < shape.size(); i++)
+    {
+      shape_const->at<loco::DataType::S32>(i) = static_cast<int32_t>(shape.at(i));
+    }
+    shape_const->name("shape_const");
+    target->shape(shape_const);
+    target->rank(shape.size());
+    for (uint32_t i = 0; i < shape.size(); i++)
+    {
+      target->dim(i) = shape[i];
+    }
+    target->shape_status(luci::ShapeStatus::VALID);
+  }
+
+  void buildGraph(const std::initializer_list<uint32_t> base_shape,
+                  const std::initializer_list<uint32_t> first_shape,
+                  const std::initializer_list<uint32_t> second_shape)
+  {
+    // Input Create.
+    input = g.nodes()->create<luci::CircleInput>();
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    input->shape_status(luci::ShapeStatus::VALID);
+    input->shape(base_shape);
+    input->name("input");
+
+    // Create first reshape.
+    first_reshape = g.nodes()->create<luci::CircleReshape>();
+    first_reshape->tensor(input);
+    first_reshape->name("Reshape");
+    createReshapeConst(first_reshape, first_shape);
+
+    // Create logistic.
+    logistic = g.nodes()->create<luci::CircleLogistic>();
+    logistic->x(first_reshape);
+    logistic->name("logistic");
+    logistic->shape(first_shape);
+    logistic->shape_status(luci::ShapeStatus::VALID);
+
+    // Create second reshape.
+    second_reshape = g.nodes()->create<luci::CircleReshape>();
+    second_reshape->tensor(logistic);
+    second_reshape->name("second_reshape");
+    createReshapeConst(second_reshape, second_shape);
+
+    // Output Connect.
+    output = g.nodes()->create<luci::CircleOutput>();
+    output->from(second_reshape);
+    output->name("output");
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleReshape *first_reshape = nullptr;
+  luci::CircleLogistic *logistic = nullptr;
+  luci::CircleReshape *second_reshape = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+} // namespace
+
+TEST_F(RemoveUnnecessaryReshapeNet, simple_case)
+{
+  buildGraph({1, 1, 1, 32}, {1, 1, 32, 1}, {1, 1, 1, 32});
+  luci::RemoveUnnecessaryReshapeNetPass pass;
+
+  ASSERT_TRUE(pass.run(&g));
+
+  int count = 0;
+  for (auto node : loco::active_nodes(loco::output_nodes(&g)))
+  {
+    if (auto reshape = dynamic_cast<luci::CircleReshape *>(node))
+      count++;
+  }
+  ASSERT_EQ(0, count);
+}
+
+TEST_F(RemoveUnnecessaryReshapeNet, shape_mismatch_NEG)
+{
+  buildGraph({1, 1, 1, 32}, {1, 1, 32, 1}, {1, 1, 2, 16});
+  luci::RemoveUnnecessaryReshapeNetPass pass;
+  ASSERT_FALSE(pass.run(&g));
+}
diff --git a/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.cpp b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.cpp
new file mode 100644
index 000000000..741b70956
--- /dev/null
+++ b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.cpp
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h>
+
+namespace
+{
+
+// TODO move to global helper list if needed
+/**
+ * @brief Create a node with `inp` as input from fused activation fucntion `act`
+ */
+luci::CircleNode *fromActivation(luci::CircleNode *inp, luci::FusedActFunc act)
+{
+  switch (act)
+  {
+    case luci::FusedActFunc::NONE:
+      return inp;
+    case luci::FusedActFunc::RELU:
+    {
+      auto n = inp->graph()->nodes()->create<luci::CircleRelu>();
+      n->features(inp);
+      return n;
+    }
+    case luci::FusedActFunc::RELU6:
+    {
+      auto n = inp->graph()->nodes()->create<luci::CircleRelu6>();
+      n->features(inp);
+      return n;
+    }
+    case luci::FusedActFunc::RELU_N1_TO_1:
+    {
+      auto n = inp->graph()->nodes()->create<luci::CircleReluN1To1>();
+      n->features(inp);
+      return n;
+    }
+    case luci::FusedActFunc::TANH:
+    {
+      auto n = inp->graph()->nodes()->create<luci::CircleTanh>();
+      n->x(inp);
+      return n;
+    }
+    case luci::FusedActFunc::SIGN_BIT:
+    {
+      throw std::invalid_argument("no matching node to create from fused activation");
+    }
+    default:
+      throw std::invalid_argument("invalid fused activation");
+  }
+}
+
+/**
+ *  Replace Fully Connected with Batched MatMul
+ *
+ *  BEFORE
+ *
+ *         [Node1]         [Node2]
+ *           |               |
+ *       [transpose]?   [transpose]?
+ *               \        /
+ *            [FullyConnected]
+ *
+ *  AFTER
+ *
+ *              [Node1]  [Node2]
+ *                  \      /
+ *               [BatchMatMul] [BiasValue]?
+ *                        \       /
+ *                          [Add]?
+ *                            |
+ *                       [Activation]?
+ *
+ * Nodes with "?" denote optional elements
+ */
+bool replace_fc_with_matmul(luci::CircleFullyConnected *fc)
+{
+  luci::CircleNode *x = nullptr;
+  luci::CircleNode *y = nullptr;
+  luci::CircleNode *b = nullptr;
+  luci::CircleTranspose *ty = nullptr;
+  luci::CircleTranspose *tx = nullptr;
+  bool adj_x = false;
+  bool adj_y = true;
+
+  if (dynamic_cast<luci::CircleConst *>(fc->weights()))
+    return false; // NonConst
+
+  if ((ty = dynamic_cast<luci::CircleTranspose *>(fc->weights()))) // is y a transpose?
+  {
+    adj_y = false;
+    if (dynamic_cast<luci::CircleConst *>(ty->a()))
+      return false;
+    else
+      y = loco::must_cast<luci::CircleNode *>(ty->a());
+  }
+  else
+  { // y is not transpose and not const
+    y = loco::must_cast<luci::CircleNode *>(fc->weights());
+  }
+  if ((tx = dynamic_cast<luci::CircleTranspose *>(fc->input())))
+  {
+    adj_x = true;
+    x = loco::must_cast<luci::CircleNode *>(tx->a());
+  }
+  else
+  {
+    x = loco::must_cast<luci::CircleNode *>(fc->input());
+  }
+
+  b = loco::must_cast<luci::CircleNode *>(fc->bias());
+
+  if (x->dtype() != loco::DataType::FLOAT32 || y->dtype() != loco::DataType::FLOAT32 ||
+      b->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  auto name = fc->name();
+  assert(name.length() > 0);
+
+  auto matmul = fc->graph()->nodes()->create<luci::CircleBatchMatMul>();
+  matmul->x(x);
+  matmul->y(y);
+  matmul->adj_x(adj_x);
+  matmul->adj_y(adj_y);
+  matmul->name(name);
+  matmul->dtype(fc->dtype());
+
+  luci::add_origin(matmul, luci::get_origin(fc));
+
+  auto all_zero = [](const luci::CircleConst *c) {
+    bool ac = true;
+    for (uint32_t i = 0; i < c->size<loco::DataType::FLOAT32>() && ac; i++)
+    {
+      ac &= c->at<loco::DataType::FLOAT32>(i) == 0.0f;
+    }
+    return ac;
+  };
+
+  auto bc = dynamic_cast<luci::CircleConst *>(b);
+  if ((nullptr != bc) && !all_zero(bc))
+  {
+    auto bias_add = fc->graph()->nodes()->create<luci::CircleAdd>();
+    bias_add->x(matmul);
+    bias_add->y(b);
+    bias_add->name(fc->name() + "/bias_add");
+    bias_add->dtype(fc->dtype());
+    add_origin(bias_add, get_origin(fc));
+    bias_add->fusedActivationFunction(fc->fusedActivationFunction());
+    loco::replace(fc).with(bias_add);
+  }
+  else
+  {
+    auto n = fromActivation(matmul, fc->fusedActivationFunction());
+    add_origin(n, luci::get_origin(fc));
+    n->name(fc->name() + "fusedActivation");
+    n->dtype(fc->dtype());
+    loco::replace(fc).with(n);
+  }
+
+  return true;
+}
+} // namespace
+
+namespace luci
+{
+
+bool ReplaceNonConstFCWithBatchMatMulPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto fc = dynamic_cast<luci::CircleFullyConnected *>(node))
+    {
+      if (replace_fc_with_matmul(fc))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.test.cpp b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.test.cpp
new file mode 100644
index 000000000..7606a6125
--- /dev/null
+++ b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.test.cpp
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h"
+
+#include <luci/test/TestIOGraph.h>
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+// TODO Reduce duplicate codes in ResolveCustomOpMatMulPass.cpp
+template <typename T>
+luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype,
+                                     const std::vector<uint32_t> &shape,
+                                     const std::vector<T> &values)
+{
+  auto node = g->nodes()->create<luci::CircleConst>();
+  node->dtype(dtype);
+  node->rank(shape.size());
+
+  uint32_t size = 1;
+  for (uint32_t i = 0; i < shape.size(); ++i)
+  {
+    node->dim(i) = shape.at(i);
+    size *= shape.at(i);
+  }
+  node->shape_status(luci::ShapeStatus::VALID);
+
+#define INIT_VALUES(DT)                          \
+  {                                              \
+    node->size<DT>(size);                        \
+    for (uint32_t i = 0; i < values.size(); ++i) \
+      node->at<DT>(i) = values[i];               \
+  }
+
+  switch (dtype)
+  {
+    case loco::DataType::U8:
+      INIT_VALUES(loco::DataType::U8);
+      break;
+    case loco::DataType::S16:
+      INIT_VALUES(loco::DataType::S16);
+      break;
+    case loco::DataType::S32:
+      INIT_VALUES(loco::DataType::S32);
+      break;
+    case loco::DataType::FLOAT32:
+      INIT_VALUES(loco::DataType::FLOAT32)
+      break;
+    default:
+      INTERNAL_EXN("create_const_node called with unsupported type");
+      break;
+  }
+  return node;
+}
+
+/**
+ *  Simple graph for test
+ *
+ *  BEFORE
+ *
+ *   [IFM1] [IFM2] [BIAS]
+ *        \   |   /
+ *          [FC]
+ *            |
+ *          [Res]
+ *
+ *  AFTER
+ *   [IFM1] [IFM2]
+ *        \   |
+ *      [BatchMatMul] [BIAS]
+ *              \      /
+ *               [Add]
+ *                 |
+ *               [Res]
+ *
+ */
+struct FCGraphlet
+{
+public:
+  FCGraphlet() = default;
+  virtual ~FCGraphlet() = default;
+
+  void init(loco::Graph *g, const ShapeU32 r_shape, const float bv)
+  {
+    _tr_y = g->nodes()->create<luci::CircleTranspose>();
+    _tr_y->a(_y);
+    std::vector<int32_t> tr_val = {1, 0};
+    _tr_y->perm(create_const_node(g, loco::DataType::S32, {2}, tr_val));
+
+    _fc = g->nodes()->create<luci::CircleFullyConnected>();
+    _fc->input(_x);
+    _fc->weights(_tr_y);
+    _fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _fc->dtype(loco::DataType::FLOAT32);
+    _fc->shape(r_shape);
+    auto l = _fc->dim(_fc->rank() - 1).value();
+    std::vector<float> bias_val(l, bv);
+    _fc->bias(create_const_node(g, loco::DataType::FLOAT32, {l}, bias_val));
+    _fc->name("fc");
+  }
+
+public:
+  luci::CircleFullyConnected *fc() { return _fc; }
+
+protected:
+  luci::CircleFullyConnected *_fc = nullptr;
+  luci::CircleTranspose *_tr_y = nullptr;
+  luci::CircleInput *_x = nullptr;
+  luci::CircleInput *_y = nullptr;
+};
+
+struct FCGraph : public TestIsGraphlet<2>, public TestOGraphlet, public FCGraphlet
+{
+  FCGraph() = default;
+  virtual ~FCGraph() = default;
+  void init(const ShapeU32 x_shape, const ShapeU32 y_shape, const ShapeU32 r_shape, const float bv)
+  {
+    TestIsGraphlet<2>::init(g(), {x_shape, y_shape});
+    TestOGraphlet::init(g(), r_shape);
+    _x = input(0);
+    _y = input(1);
+    FCGraphlet::init(g(), r_shape, bv);
+    output()->from(_fc);
+  }
+};
+
+class ReplaceNonConstFCWithBatchMatMulPassTest : public ::testing::Test
+{
+public:
+  FCGraph g;
+  luci::ReplaceNonConstFCWithBatchMatMulPass pass;
+};
+
+} // namespace
+
+TEST_F(ReplaceNonConstFCWithBatchMatMulPassTest, simple_test)
+{
+  g.init({2, 3}, {2, 3}, {2, 2}, 0.0f);
+
+  auto ret = pass.run(g.g());
+  EXPECT_EQ(true, ret);
+
+  auto mm = dynamic_cast<luci::CircleBatchMatMul *>(g.output()->from());
+  EXPECT_NE(nullptr, mm);
+}
+
+TEST_F(ReplaceNonConstFCWithBatchMatMulPassTest, nonzero_bias_test)
+{
+  g.init({2, 3}, {2, 3}, {2, 2}, 1.0f);
+
+  auto ret = pass.run(g.g());
+  EXPECT_EQ(true, ret);
+
+  auto mm = dynamic_cast<luci::CircleAdd *>(g.output()->from());
+  EXPECT_NE(nullptr, mm);
+}
+
+TEST_F(ReplaceNonConstFCWithBatchMatMulPassTest, wrong_op_NEG)
+{
+  loco::Graph g;
+
+  auto inp = g.nodes()->create<luci::CircleInput>();
+  auto relu = g.nodes()->create<luci::CircleRelu>();
+  relu->features(inp);
+
+  luci::ReplaceNonConstFCWithBatchMatMulPass pass;
+  auto changed = pass.run(&g);
+
+  EXPECT_EQ(false, changed);
+}
diff --git a/compiler/luci/pass/src/ResolveCustomOpSplitVPass.cpp b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.cpp
new file mode 100644
index 000000000..a65065800
--- /dev/null
+++ b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpSplitVPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Service/Nodes/CircleConst.h>
+
+namespace
+{
+
+// Input node is const S64
+// Return s32 version of node
+// Return nullptr if s64 value is out of range of s32
+luci::CircleConst *s64_to_s32(luci::CircleConst *node)
+{
+  assert(node);
+  assert(node->dtype() == loco::DataType::S64);
+
+  auto cloned = luci::clone(node);
+  luci::add_origin(cloned, luci::get_origin(node));
+
+  const auto num_elems = node->size<loco::DataType::S64>();
+
+  cloned->dtype(loco::DataType::S32);
+  cloned->size<loco::DataType::S32>(num_elems);
+
+  for (uint32_t i = 0; i < num_elems; i++)
+  {
+    int64_t val = node->at<loco::DataType::S64>(i);
+    if (val < std::numeric_limits<int32_t>::min() or val > std::numeric_limits<int32_t>::max())
+      return nullptr;
+
+    cloned->at<loco::DataType::S32>(i) = static_cast<int32_t>(val);
+  }
+
+  return cloned;
+}
+
+/** BEFORE
+ *
+ *        [CircleNode]
+ *              \
+ *               \   [size_splits]  [split_dim]
+ *                \       |             /
+ *               [CircleCustom(SplitV))]
+ *                        |
+ *                 [CircleCustomOut]
+ *                        |
+ *                   [CircleNode]
+ *
+ *  AFTER
+ *
+ *                [CircleNode]
+ *                  |   \
+ *                  |     \   [size_splits]  [split_dim]
+ *                  |      \       |         /
+ *                  |       \      |       /
+ *                  |        \     |      /
+ *    [CircleCustom(SplitV)]  [CircleSplitV]
+ *                  |              |
+ *      [CircleCustomOut]    [CircleSplitVOut]
+ *                                 |
+ *                            [CircleNode]
+ */
+bool resolve_splitv(luci::CircleCustom *node)
+{
+  const std::string custom_code = node->custom_code();
+  const std::vector<uint8_t> custom_options = node->custom_options();
+
+  if (custom_code != "SplitV")
+    return false;
+
+  if (node->numInputs() != 3)
+    return false;
+
+  auto size_splits = dynamic_cast<luci::CircleConst *>(node->inputs(1));
+  if (not size_splits)
+    return false;
+
+  // Convert size_splits to S32, because luci-interpeter does not support
+  // S64 size_splits yet
+  // TODO Support S64 size_splits
+  if (size_splits->dtype() == loco::DataType::S64)
+  {
+    size_splits = s64_to_s32(size_splits);
+    if (not size_splits)
+      return false;
+  }
+  if (size_splits->dtype() != loco::DataType::S32)
+    return false;
+
+  auto split_dim = dynamic_cast<luci::CircleConst *>(node->inputs(2));
+  if (not split_dim)
+    return false;
+
+  if (split_dim->dtype() == loco::DataType::S64)
+  {
+    split_dim = s64_to_s32(split_dim);
+    if (not split_dim)
+      return false;
+  }
+  if (split_dim->dtype() != loco::DataType::S32)
+    return false;
+
+  if (size_splits->rank() != 1)
+    return false;
+
+  const auto num_split = size_splits->dim(0).value();
+
+  auto split_v = node->graph()->nodes()->create<luci::CircleSplitV>();
+  split_v->input(node->inputs(0));
+  split_v->size_splits(size_splits);
+  split_v->split_dim(split_dim);
+  split_v->num_split(num_split);
+  split_v->name(node->name());
+  luci::add_origin(split_v, luci::get_origin(node));
+
+  int32_t i = 0;
+  const auto succs = loco::succs(node);
+  for (auto succ : succs)
+  {
+    auto custom_out = loco::must_cast<luci::CircleCustomOut *>(succ); // FIX_CALLER_UNLESS
+
+    auto split_v_out = node->graph()->nodes()->create<luci::CircleSplitVOut>();
+    split_v_out->input(split_v);
+    split_v_out->name(node->name() + "_out_" + std::to_string(i));
+    split_v_out->index(i++);
+    luci::add_origin(split_v_out, luci::get_origin(node));
+    loco::replace(custom_out).with(split_v_out);
+  }
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool ResolveCustomOpSplitVPass::run(loco::Graph *g)
+{
+  bool changed = false;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto cop = dynamic_cast<luci::CircleCustom *>(node);
+    if (not cop)
+      continue;
+
+    if (resolve_splitv(cop))
+      changed = true;
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ResolveCustomOpSplitVPass.test.cpp b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.test.cpp
new file mode 100644
index 000000000..e7738aadb
--- /dev/null
+++ b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.test.cpp
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpSplitVPass.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/CircleNodes.h>
+#include <gtest/gtest.h>
+
+using namespace luci::test;
+
+namespace
+{
+
+/**
+ *  graph having Custom operator SplitV
+ *
+ *        [Input]  [Const] [Const]
+ *             \    |    /
+ *           [Custom(SplitV)]
+ *             /    |       \
+ *  [CustomOut] [CustomOut] [CustomOut]
+ *       |          |           |
+ *   [Output]   [Output]     [Output]
+ */
+class SplitVGraphlet
+{
+public:
+  SplitVGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    // CircleCustom(SplitV)
+    _splitv = g->nodes()->create<luci::CircleCustom>(3, 3);
+    _splitv->custom_code("SplitV");
+    _splitv->shape({1, 2, 2, 192});
+    _splitv->dtype(loco::DataType::FLOAT32);
+    _splitv->name("splitv");
+
+    // CircleConst
+    auto size_splits = g->nodes()->create<luci::CircleConst>();
+    size_splits->dtype(loco::DataType::S64);
+    size_splits->shape({3});
+    size_splits->size<loco::DataType::S64>(3);
+    size_splits->at<loco::DataType::S64>(0) = 32;
+    size_splits->at<loco::DataType::S64>(1) = 32;
+    size_splits->at<loco::DataType::S64>(2) = 128;
+
+    // CircleConst
+    auto split_dim = g->nodes()->create<luci::CircleConst>();
+    split_dim->dtype(loco::DataType::S32);
+    split_dim->rank(0);
+    split_dim->size<loco::DataType::S32>(1);
+    split_dim->scalar<loco::DataType::S32>() = 3;
+
+    _splitv->inputs(1, size_splits);
+    _splitv->inputs(2, split_dim);
+
+    // CircleCustomOut
+    _splitv_out1 = g->nodes()->create<luci::CircleCustomOut>();
+    _splitv_out1->shape({1, 2, 2, 32});
+    _splitv_out1->dtype(loco::DataType::FLOAT32);
+    _splitv_out1->index(0);
+    _splitv_out1->input(_splitv);
+
+    // CircleCustomOut
+    _splitv_out2 = g->nodes()->create<luci::CircleCustomOut>();
+    _splitv_out2->shape({1, 2, 2, 32});
+    _splitv_out2->dtype(loco::DataType::FLOAT32);
+    _splitv_out2->index(1);
+    _splitv_out2->input(_splitv);
+
+    // CircleCustomOut
+    _splitv_out3 = g->nodes()->create<luci::CircleCustomOut>();
+    _splitv_out3->shape({1, 2, 2, 128});
+    _splitv_out3->dtype(loco::DataType::FLOAT32);
+    _splitv_out3->index(2);
+    _splitv_out3->input(_splitv);
+  }
+
+public:
+  luci::CircleCustom *splitv() { return _splitv; }
+
+protected:
+  luci::CircleCustom *_splitv = nullptr;
+  luci::CircleCustomOut *_splitv_out1 = nullptr;
+  luci::CircleCustomOut *_splitv_out2 = nullptr;
+  luci::CircleCustomOut *_splitv_out3 = nullptr;
+};
+
+class SplitVGraph : public TestIGraphlet, public TestOsGraphlet<3>, public SplitVGraphlet
+{
+public:
+  SplitVGraph() = default;
+
+  void init(void)
+  {
+    TestIGraphlet::init(g(), {1, 2, 2, 192});
+    TestOsGraphlet<3>::init(g(), {{1, 2, 2, 32}, {1, 2, 2, 32}, {1, 2, 2, 128}});
+    SplitVGraphlet::init(g());
+
+    // connect graph
+    _splitv->inputs(0, input());
+
+    output(0)->from(_splitv_out1);
+    output(1)->from(_splitv_out2);
+    output(2)->from(_splitv_out3);
+  }
+};
+
+class SplitVGraphTest : public ::testing::Test
+{
+public:
+  SplitVGraph g;
+  luci::ResolveCustomOpSplitVPass pass;
+};
+
+} // namespace
+
+TEST_F(SplitVGraphTest, simple_test)
+{
+  g.init();
+
+  auto ret = pass.run(g.g());
+  EXPECT_EQ(true, ret);
+
+  auto svo_1 = dynamic_cast<luci::CircleSplitVOut *>(g.output(0)->from());
+  EXPECT_NE(nullptr, svo_1);
+  auto svo_2 = dynamic_cast<luci::CircleSplitVOut *>(g.output(1)->from());
+  EXPECT_NE(nullptr, svo_2);
+  auto svo_3 = dynamic_cast<luci::CircleSplitVOut *>(g.output(2)->from());
+  EXPECT_NE(nullptr, svo_3);
+
+  auto sv = dynamic_cast<luci::CircleSplitV *>(svo_1->input());
+  EXPECT_NE(nullptr, sv);
+  sv = dynamic_cast<luci::CircleSplitV *>(svo_2->input());
+  EXPECT_NE(nullptr, sv);
+  sv = dynamic_cast<luci::CircleSplitV *>(svo_3->input());
+  EXPECT_NE(nullptr, sv);
+
+  auto size_splits = loco::must_cast<luci::CircleConst *>(sv->size_splits());
+  EXPECT_EQ(loco::DataType::S32, size_splits->dtype());
+  EXPECT_EQ(32, size_splits->at<loco::DataType::S32>(0));
+  EXPECT_EQ(32, size_splits->at<loco::DataType::S32>(1));
+  EXPECT_EQ(128, size_splits->at<loco::DataType::S32>(2));
+
+  auto split_dim = loco::must_cast<luci::CircleConst *>(sv->split_dim());
+  EXPECT_EQ(loco::DataType::S32, split_dim->dtype());
+  EXPECT_EQ(3, split_dim->scalar<loco::DataType::S32>());
+}
+
+TEST_F(SplitVGraphTest, wrong_op_NEG)
+{
+  g.init();
+
+  g.splitv()->custom_code("AddV2");
+
+  auto ret = pass.run(g.g());
+  EXPECT_EQ(false, ret);
+}
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h
index 442183c18..408e6b8d9 100644
--- a/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h
@@ -197,6 +197,13 @@ private:
     return true;
   }
 
+  bool visit(const luci::CircleReduceMax *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
   bool visit(const luci::CircleRelu *node)
   {
     RETURN_FALSE_UNLESS(is_lwq(node));
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp b/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp
index 4e1c062c0..cf86acabe 100644
--- a/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp
@@ -302,6 +302,15 @@ bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePow *nod
 }
 
 template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleReduceMax *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), loco::DataType::S32))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
 bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleRelu *node)
 {
   return group_has_type(node, Qtype);
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeType.h b/compiler/luci/pass/src/VerifyQuantizedNodeType.h
index ff1acbd6f..789d3c7cd 100644
--- a/compiler/luci/pass/src/VerifyQuantizedNodeType.h
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeType.h
@@ -104,6 +104,7 @@ private:
   bool visit(const luci::CirclePadV2 *node);
   bool visit(const luci::CirclePRelu *node);
   bool visit(const luci::CirclePow *node);
+  bool visit(const luci::CircleReduceMax *node);
   bool visit(const luci::CircleRelu *node);
   bool visit(const luci::CircleReshape *node);
   bool visit(const luci::CircleResizeBilinear *node);
diff --git a/compiler/luci/pass/src/helpers/SparsityFormatConverter.cpp b/compiler/luci/pass/src/helpers/SparsityFormatConverter.cpp
new file mode 100644
index 000000000..72b7d60ff
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/SparsityFormatConverter.cpp
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// codes under namespace sparsity referenced from
+// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/
+//       tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h
+//       tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc
+
+#include "SparsityFormatConverter.h"
+
+#include <oops/InternalExn.h>
+
+#include <cassert>
+
+namespace sparsity
+{
+
+namespace
+{
+
+uint64_t GetFlattenedIndex(const std::vector<int> &indices, const std::vector<int> &shape)
+{
+  uint64_t index = 0;
+  int sub_elements = 1;
+  for (int i = shape.size() - 1; i >= 0; i--)
+  {
+    index += indices[i] * sub_elements;
+    sub_elements *= shape[i];
+  }
+  return index;
+}
+
+std::vector<int> TfLiteIntArrayToVector(const TfLiteIntArray *int_array)
+{
+  std::vector<int> values;
+  if (!int_array)
+  {
+    return values;
+  }
+
+  values.resize(int_array->size);
+  for (int i = 0; i < int_array->size; i++)
+  {
+    values[i] = int_array->data[i];
+  }
+
+  return values;
+}
+
+} // namespace
+
+template <typename T>
+FormatConverter<T>::FormatConverter(const std::vector<int> &shape, const TfLiteSparsity &sparsity)
+{
+  auto traversal_order = TfLiteIntArrayToVector(sparsity.traversal_order);
+  auto block_map = TfLiteIntArrayToVector(sparsity.block_map);
+
+  std::vector<TfLiteDimensionType> format(sparsity.dim_metadata_size);
+  std::vector<int> dense_size(sparsity.dim_metadata_size);
+  std::vector<std::vector<int>> segments(sparsity.dim_metadata_size);
+  std::vector<std::vector<int>> indices(sparsity.dim_metadata_size);
+  for (int i = 0; i < sparsity.dim_metadata_size; i++)
+  {
+    format[i] = sparsity.dim_metadata[i].format;
+    dense_size[i] = sparsity.dim_metadata[i].dense_size;
+    segments[i] = TfLiteIntArrayToVector(sparsity.dim_metadata[i].array_segments);
+    indices[i] = TfLiteIntArrayToVector(sparsity.dim_metadata[i].array_indices);
+  }
+
+  InitSparseToDenseConverter(shape, std::move(traversal_order), std::move(format),
+                             std::move(dense_size), std::move(segments), std::move(indices),
+                             std::move(block_map));
+}
+
+template <typename T>
+void FormatConverter<T>::InitSparseToDenseConverter(
+  std::vector<int> shape, std::vector<int> traversal_order, std::vector<TfLiteDimensionType> format,
+  std::vector<int> dense_size, std::vector<std::vector<int>> segments,
+  std::vector<std::vector<int>> indices, std::vector<int> block_map)
+{
+  dense_shape_ = std::move(shape);
+  traversal_order_ = std::move(traversal_order);
+  block_map_ = std::move(block_map);
+  format_ = std::move(format);
+
+  dense_size_ = 1;
+  for (size_t i = 0; i < dense_shape_.size(); i++)
+  {
+    dense_size_ *= dense_shape_[i];
+  }
+
+  dim_metadata_.resize(2 * format_.size());
+  for (size_t i = 0; i < format_.size(); i++)
+  {
+    if (format_[i] == kTfLiteDimDense)
+    {
+      dim_metadata_[2 * i] = {dense_size[i]};
+    }
+    else
+    {
+      dim_metadata_[2 * i] = std::move(segments[i]);
+      dim_metadata_[2 * i + 1] = std::move(indices[i]);
+    }
+  }
+
+  int original_rank = dense_shape_.size();
+  int block_dim = 0;
+
+  blocked_shape_.resize(original_rank);
+  block_size_.resize(block_map_.size());
+  for (int i = 0; i < original_rank; i++)
+  {
+    if (block_dim < (int)block_map_.size() && block_map_[block_dim] == i)
+    {
+      if (original_rank + block_dim < (int)traversal_order_.size())
+      {
+        int orig_dim = traversal_order_[original_rank + block_dim];
+        block_size_[block_dim] = dense_size[orig_dim];
+        blocked_shape_[i] = dense_shape_[i] / dense_size[orig_dim];
+        block_dim++;
+      }
+    }
+    else
+    {
+      blocked_shape_[i] = dense_shape_[i];
+    }
+  }
+}
+
+template <typename T>
+void FormatConverter<T>::Populate(const T *src_data, std::vector<int> indices, int level,
+                                  int prev_idx, int *src_data_ptr, T *dest_data)
+{
+  if (static_cast<size_t>(level) == indices.size())
+  {
+    int orig_rank = dense_shape_.size();
+    std::vector<int> orig_idx;
+    orig_idx.resize(orig_rank);
+    int i = 0;
+    for (; static_cast<size_t>(i) < orig_idx.size(); i++)
+    {
+      int orig_dim = traversal_order_[i];
+      orig_idx[orig_dim] = indices[i];
+    }
+
+    for (; static_cast<size_t>(i) < indices.size(); i++)
+    {
+      const int block_idx = traversal_order_[i] - orig_rank;
+      const int orig_dim = block_map_[block_idx];
+      orig_idx[orig_dim] = orig_idx[orig_dim] * block_size_[block_idx] + indices[i];
+    }
+
+    dest_data[GetFlattenedIndex(orig_idx, dense_shape_)] = src_data[*src_data_ptr];
+
+    *src_data_ptr = *src_data_ptr + 1;
+    return;
+  }
+
+  const int metadata_idx = 2 * level;
+  const int shape_of_level = dim_metadata_[metadata_idx][0];
+  if (format_[level] == kTfLiteDimDense)
+  {
+    for (int i = 0; i < shape_of_level; i++)
+    {
+      indices[level] = i;
+      Populate(src_data, indices, level + 1, prev_idx * shape_of_level + i, src_data_ptr,
+               dest_data);
+    }
+  }
+  else if (static_cast<size_t>(prev_idx + 1) < dim_metadata_[metadata_idx].size())
+  {
+    const auto &array_segments = dim_metadata_[metadata_idx];
+    const auto &array_indices = dim_metadata_[metadata_idx + 1];
+    for (int i = array_segments[prev_idx]; i < array_segments[prev_idx + 1]; i++)
+    {
+      if (static_cast<size_t>(i) < array_indices.size() &&
+          static_cast<size_t>(level) < indices.size())
+      {
+        indices[level] = array_indices[i];
+        Populate(src_data, indices, level + 1, i, src_data_ptr, dest_data);
+      }
+    }
+  }
+}
+
+template <typename T> bool FormatConverter<T>::SparseToDense(const T *src_data)
+{
+  data_.resize(dense_size_);
+  std::fill(data_.begin(), data_.end(), T(0));
+
+  int total_rank = traversal_order_.size();
+  int src_data_ptr = 0;
+  std::vector<int> indices(total_rank);
+  Populate(src_data, indices, 0, 0, &src_data_ptr, data_.data());
+
+  return true;
+}
+
+template class FormatConverter<float>;
+template class FormatConverter<uint16_t>;
+
+} // namespace sparsity
+
+#include <luci/IR/SparsityParam.h>
+
+namespace luci
+{
+
+sparsity::TfLiteDimensionType to_tflite_sparsity(luci::DimensionType dt)
+{
+  switch (dt)
+  {
+    case luci::DimensionType::DENSE:
+      return sparsity::TfLiteDimensionType::kTfLiteDimDense;
+    case luci::DimensionType::SPARSE_CSR:
+      return sparsity::TfLiteDimensionType::kTfLiteDimSparseCSR;
+  }
+  return sparsity::TfLiteDimensionType::kTfLiteDimDense;
+}
+
+sparsity::TfLiteIntArray *to_tflite_sparsity(const luci::SparseIndexVector &data)
+{
+  auto type = data.type();
+  switch (type)
+  {
+    case luci::SparseIndexVectorType::NONE:
+    {
+      std::vector<int32_t> empty;
+      return makeTfLiteArray(empty);
+    }
+    case luci::SparseIndexVectorType::I32:
+      return makeTfLiteArray<int32_t>(*data.as_int32_vector());
+    case luci::SparseIndexVectorType::U16:
+      return makeTfLiteArray<uint16_t>(*data.as_uint16_vector());
+    case luci::SparseIndexVectorType::U8:
+      return makeTfLiteArray<uint8_t>(*data.as_uint8_vector());
+    default:
+      INTERNAL_EXN_V("unsupported SparseIndexVectorType", oops::to_uint32(type));
+  }
+}
+
+sparsity::TfLiteSparsity to_tflite_sparsity(const luci::SparsityParam *sp)
+{
+  sparsity::TfLiteSparsity tflsp;
+  tflsp.traversal_order = makeTfLiteArray(sp->traversal_order);
+  tflsp.block_map = makeTfLiteArray(sp->block_map);
+  tflsp.dim_metadata = makeTfLiteDimensionMetadata(sp->dim_metadata);
+  tflsp.dim_metadata_size = sp->dim_metadata.size();
+  return tflsp;
+}
+
+template <typename T> sparsity::TfLiteIntArray *makeTfLiteArray(const std::vector<T> &data)
+{
+  size_t cn = data.size();
+  size_t sz = 1 + data.size();
+  sparsity::TfLiteIntArray *sp = (sparsity::TfLiteIntArray *)(new int[sz]);
+  sp->size = cn;
+  for (size_t i = 0; i < cn; ++i)
+  {
+    sp->data[i] = data[i];
+  }
+  return sp;
+}
+
+sparsity::TfLiteDimensionMetadata *
+makeTfLiteDimensionMetadata(const std::vector<luci::DimMetaData> &data)
+{
+  size_t cn = data.size();
+  sparsity::TfLiteDimensionMetadata *tfldm = new sparsity::TfLiteDimensionMetadata[cn];
+
+  for (size_t i = 0; i < cn; ++i)
+  {
+    tfldm[i].format = to_tflite_sparsity(data[i].format());
+    tfldm[i].dense_size = data[i].dense_size();
+    tfldm[i].array_segments = to_tflite_sparsity(data[i].array_segments());
+    tfldm[i].array_indices = to_tflite_sparsity(data[i].array_indices());
+  }
+
+  return tfldm;
+}
+
+void freeTfLiteSparsity(sparsity::TfLiteSparsity &tflsp)
+{
+  assert(tflsp.traversal_order);
+  assert(tflsp.block_map);
+  delete[] tflsp.traversal_order;
+  delete[] tflsp.block_map;
+
+  for (int i = 0; i < tflsp.dim_metadata_size; ++i)
+  {
+    assert(tflsp.dim_metadata[i].array_segments);
+    assert(tflsp.dim_metadata[i].array_indices);
+    delete[] tflsp.dim_metadata[i].array_segments;
+    delete[] tflsp.dim_metadata[i].array_indices;
+  }
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/helpers/SparsityFormatConverter.h b/compiler/luci/pass/src/helpers/SparsityFormatConverter.h
new file mode 100644
index 000000000..fcd9bbcd0
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/SparsityFormatConverter.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_HELPERS_SPARSITY_FORMAT_CONVERTER_H__
+#define __LUCI_PASS_HELPERS_SPARSITY_FORMAT_CONVERTER_H__
+
+#include <cstdint>
+#include <vector>
+
+// codes under namespace sparsity referenced from
+// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/
+//       tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h
+//       tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc
+
+namespace sparsity
+{
+
+// Storage format of each dimension in a sparse tensor.
+typedef enum TfLiteDimensionType
+{
+  kTfLiteDimDense = 0,
+  kTfLiteDimSparseCSR,
+} TfLiteDimensionType;
+
+// Fixed size list of integers. Used for dimensions and inputs/outputs tensor
+// indices
+typedef struct TfLiteIntArray
+{
+  int size;
+  int data[];
+} TfLiteIntArray;
+
+// Metadata to encode each dimension in a sparse tensor.
+typedef struct TfLiteDimensionMetadata
+{
+  TfLiteDimensionType format;
+  int dense_size;
+  TfLiteIntArray *array_segments;
+  TfLiteIntArray *array_indices;
+} TfLiteDimensionMetadata;
+
+// Parameters used to encode a sparse tensor. For detailed explanation of each
+// field please refer to lite/schema/schema.fbs.
+typedef struct TfLiteSparsity
+{
+  TfLiteIntArray *traversal_order;
+  TfLiteIntArray *block_map;
+  TfLiteDimensionMetadata *dim_metadata;
+  int dim_metadata_size;
+} TfLiteSparsity;
+
+// A converter that keeps an internal representation of sparse tensor parameters
+// and converts tensors between dense and sparse formats.
+template <typename T> class FormatConverter
+{
+public:
+  /* Creates a sparse to dense converter.
+   * @param shape      Shape of the target dense tensor.
+   * @param sparsity   Sparsity parameter of the sparse TfLiteTensor.
+   */
+  FormatConverter(const std::vector<int> &shape, const TfLiteSparsity &sparsity);
+
+  const std::vector<T> &GetData() { return data_; }
+  const std::vector<std::vector<int>> &GetDimMetadata() { return dim_metadata_; }
+
+  bool SparseToDense(const T *src_data);
+
+private:
+  // Helper function for initializing this converter for sparse to dense
+  // conversion.
+  void InitSparseToDenseConverter(std::vector<int> shape, std::vector<int> traversal_order,
+                                  std::vector<TfLiteDimensionType> format,
+                                  std::vector<int> dense_size,
+                                  std::vector<std::vector<int>> segments,
+                                  std::vector<std::vector<int>> indices,
+                                  std::vector<int> block_map);
+
+  void Populate(const T *src_data, std::vector<int> indices, int level, int prev_idx,
+                int *src_data_ptr, T *dest_data);
+
+private:
+  std::vector<int> dense_shape_;
+  std::vector<int> blocked_shape_;
+  size_t dense_size_;
+  std::vector<int> traversal_order_;
+  std::vector<TfLiteDimensionType> format_;
+  std::vector<int> block_size_;
+  std::vector<int> block_map_;
+  std::vector<std::vector<int>> dim_metadata_;
+  std::vector<T> data_;
+};
+
+extern template class FormatConverter<float>;
+extern template class FormatConverter<uint16_t>;
+
+} // namespace sparsity
+
+#include <luci/IR/SparsityParam.h>
+
+namespace luci
+{
+
+sparsity::TfLiteDimensionType to_tflite_sparsity(luci::DimensionType dt);
+sparsity::TfLiteIntArray *to_tflite_sparsity(const luci::SparseIndexVector &data);
+sparsity::TfLiteSparsity to_tflite_sparsity(const luci::SparsityParam *sp);
+
+template <typename T> sparsity::TfLiteIntArray *makeTfLiteArray(const std::vector<T> &data);
+sparsity::TfLiteDimensionMetadata *
+makeTfLiteDimensionMetadata(const std::vector<luci::DimMetaData> &data);
+
+void freeTfLiteSparsity(sparsity::TfLiteSparsity &tflsp);
+
+} // namespace luci
+
+#endif // __LUCI_PASS_HELPERS_SPARSITY_FORMAT_CONVERTER_H__
diff --git a/compiler/luci/requires.cmake b/compiler/luci/requires.cmake
index e896188be..0a5e6a58b 100644
--- a/compiler/luci/requires.cmake
+++ b/compiler/luci/requires.cmake
@@ -10,4 +10,5 @@ require("oops")
 require("hermes")
 require("hermes-std")
 require("tflchef")
+require("circlechef")
 require("tflite2circle")
diff --git a/compiler/luci/service/src/CircleCloneNode.h b/compiler/luci/service/src/CircleCloneNode.h
index 99e4561b3..95f06db4c 100644
--- a/compiler/luci/service/src/CircleCloneNode.h
+++ b/compiler/luci/service/src/CircleCloneNode.h
@@ -72,6 +72,7 @@ public:
   CloneNodeLet(loco::Graph *graph) : _graph(graph){};
 
 public:
+  luci::CircleNode *visit(const luci::CircleDensify *) final;
   luci::CircleNode *visit(const luci::CircleDepthToSpace *) final;
   luci::CircleNode *visit(const luci::CircleDepthwiseConv2D *) final;
   luci::CircleNode *visit(const luci::CircleDequantize *) final;
diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.cpp
index 9d156f3e2..a368faef4 100644
--- a/compiler/luci/service/src/CircleShapeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleShapeInferenceRule.cpp
@@ -204,6 +204,7 @@ template <class CIRCLENODE> loco::NodeShape broadcast_xy(const CIRCLENODE *node)
     return loco::NodeShape{inputs_shape};                                               \
   }
 
+DECLARE_USE_SINGLE(input);
 DECLARE_USE_SINGLE(inputs);
 DECLARE_USE_SINGLE(x);
 DECLARE_USE_SINGLE(logits);
@@ -258,10 +259,10 @@ loco::NodeShape infer_add_n(const luci::CircleAddN *node)
   return loco::NodeShape{shape};
 }
 
-loco::NodeShape infer_arg_max(const luci::CircleArgMax *node)
+template <class CIRCLENODE> loco::NodeShape infer_arg_maxmin(const CIRCLENODE *node)
 {
-  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
-  auto dimension_shape = luci::shape_get(node->dimension()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).template as<loco::TensorShape>();
+  auto dimension_shape = luci::shape_get(node->dimension()).template as<loco::TensorShape>();
 
   int64_t select_axis = 0;
   {
@@ -271,55 +272,19 @@ loco::NodeShape infer_arg_max(const luci::CircleArgMax *node)
     // Support S32 for now.
     auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
     LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
-                "Only support int32 CircleConst for CircleArgMax");
+                "Only support int32 CircleConst for CircleArgMax/CircleArgMin");
 
     if (const_shape_node->rank() > 1)
       INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
                      oops::to_uint32(const_shape_node->rank()));
 
-    select_axis = const_shape_node->scalar<loco::DataType::S32>();
-  }
-  assert(select_axis < input_shape.rank());
-  assert(select_axis >= 0); // TODO support minus of this breaks
-
-  // NOTE select_axis is removed
-  loco::TensorShape shape_output;
-  uint32_t rank = input_shape.rank();
-  uint32_t shrink = static_cast<uint32_t>(select_axis);
-  assert(rank > 0);
-  shape_output.rank(rank - 1);
-  for (uint32_t r = 0, d = 0; r < rank; ++r)
-  {
-    if (r == shrink)
-      continue;
-    shape_output.dim(d++) = input_shape.dim(r);
+    select_axis = const_shape_node->template scalar<loco::DataType::S32>();
   }
-  return loco::NodeShape{shape_output};
-}
-
-loco::NodeShape infer_arg_min(const luci::CircleArgMin *node)
-{
-  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
-  auto dimension_shape = luci::shape_get(node->dimension()).as<loco::TensorShape>();
-
-  int64_t select_axis = 0;
-  {
-    LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
-
-    // Only support node's shape() is CircleConst with S32/S64
-    // Support S32 for now.
-    auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
-    LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
-                "Only support int32 CircleConst for CircleArgMin");
-
-    if (const_shape_node->rank() > 1)
-      INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
-                     oops::to_uint32(const_shape_node->rank()));
 
-    select_axis = const_shape_node->scalar<loco::DataType::S32>();
-  }
   assert(select_axis < input_shape.rank());
-  assert(select_axis >= 0); // TODO support minus of this breaks
+
+  if (select_axis < 0)
+    select_axis += input_shape.rank();
 
   // NOTE select_axis is removed
   loco::TensorShape shape_output;
@@ -1180,45 +1145,17 @@ loco::NodeShape infer_reshape(const luci::CircleReshape *node)
   return loco::NodeShape{output_shape};
 }
 
-loco::NodeShape infer_resize_bilinear(const luci::CircleResizeBilinear *node)
+template <class CIRCLENODE> loco::NodeShape infer_resize_type(const CIRCLENODE *node)
 {
-  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
-
-  if (input_shape.rank() != 4)
-    INTERNAL_EXN("Expected ResizeBilinear input to have rank 4");
-
-  auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
-
-  if (const_node->dtype() != loco::DataType::S32)
-    INTERNAL_EXN("Only S32 datatype is supported for ResizeBilinear size");
-
-  if (const_node->rank() != 1)
-    INTERNAL_EXN("Expected size tensor of rank 1");
-
-  if (const_node->dim(0).value() != 2)
-    INTERNAL_EXN("Expected size tensor with shape [2]");
-
-  loco::TensorShape output_shape;
-  output_shape.rank(4);
-  output_shape.dim(0) = input_shape.dim(0);
-  output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
-  output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
-  output_shape.dim(3) = input_shape.dim(3);
-
-  return loco::NodeShape{output_shape};
-}
-
-loco::NodeShape infer_resize_nearest_neighbor(const luci::CircleResizeNearestNeighbor *node)
-{
-  auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+  auto input_shape = luci::shape_get(node->input()).template as<loco::TensorShape>();
 
   if (input_shape.rank() != 4)
-    INTERNAL_EXN("Expected ResizeNearesNeighbor input to have rank 4");
+    INTERNAL_EXN("Expected input to have rank 4");
 
   auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
 
   if (const_node->dtype() != loco::DataType::S32)
-    INTERNAL_EXN("Only S32 datatype is supported for ResizeNearesNeighbor size");
+    INTERNAL_EXN("Only S32 datatype is supported for size");
 
   if (const_node->rank() != 1)
     INTERNAL_EXN("Expected size tensor of rank 1");
@@ -1229,8 +1166,8 @@ loco::NodeShape infer_resize_nearest_neighbor(const luci::CircleResizeNearestNei
   loco::TensorShape output_shape;
   output_shape.rank(4);
   output_shape.dim(0) = input_shape.dim(0);
-  output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
-  output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
+  output_shape.dim(1) = const_node->template at<loco::DataType::S32>(0);
+  output_shape.dim(2) = const_node->template at<loco::DataType::S32>(1);
   output_shape.dim(3) = input_shape.dim(3);
 
   return loco::NodeShape{output_shape};
@@ -2080,9 +2017,9 @@ public:
 
   loco::NodeShape visit(const luci::CircleAddN *node) final { return infer_add_n(node); }
 
-  loco::NodeShape visit(const luci::CircleArgMax *node) final { return infer_arg_max(node); }
+  loco::NodeShape visit(const luci::CircleArgMax *node) final { return infer_arg_maxmin(node); }
 
-  loco::NodeShape visit(const luci::CircleArgMin *node) final { return infer_arg_min(node); }
+  loco::NodeShape visit(const luci::CircleArgMin *node) final { return infer_arg_maxmin(node); }
 
   loco::NodeShape visit(const luci::CircleAveragePool2D *node) final
   {
@@ -2119,6 +2056,8 @@ public:
 
   loco::NodeShape visit(const luci::CircleCustom *node) final { return use_own(node); }
 
+  loco::NodeShape visit(const luci::CircleDensify *node) final { return use_input(node); }
+
   loco::NodeShape visit(const luci::CircleDepthToSpace *node) final
   {
     return infer_depth_to_space(node);
@@ -2348,12 +2287,12 @@ public:
 
   loco::NodeShape visit(const luci::CircleResizeBilinear *node) final
   {
-    return infer_resize_bilinear(node);
+    return infer_resize_type(node);
   }
 
   loco::NodeShape visit(const luci::CircleResizeNearestNeighbor *node) final
   {
-    return infer_resize_nearest_neighbor(node);
+    return infer_resize_type(node);
   }
 
   loco::NodeShape visit(const luci::CircleReverseSequence *node) final
diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
index 438c4a364..7616390ae 100644
--- a/compiler/luci/service/src/CircleTypeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
@@ -102,6 +102,11 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
     return node->dtype();
   }
 
+  loco::DataType visit(const luci::CircleDensify *node) final
+  {
+    return luci::dtype_get(node->input());
+  }
+
   loco::DataType visit(const luci::CircleDepthToSpace *node) final
   {
     return luci::dtype_get(node->input());
diff --git a/compiler/luci/service/src/Nodes/CircleDensify.cpp b/compiler/luci/service/src/Nodes/CircleDensify.cpp
new file mode 100644
index 000000000..a0d15b6c7
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDensify.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::DEF>::visit(const luci::CircleDensify *)
+{
+  return _graph->nodes()->create<luci::CircleDensify>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleDensify.test.cpp b/compiler/luci/service/src/Nodes/CircleDensify.test.cpp
new file mode 100644
index 000000000..d0f32c1a2
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDensify.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Densify)
+{
+  auto g = loco::make_graph();
+  auto node_densify = g->nodes()->create<luci::CircleDensify>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_densify, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_densify = dynamic_cast<luci::CircleDensify *>(cloned);
+  ASSERT_NE(nullptr, cloned_densify);
+}
diff --git a/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp b/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp
index c5864f938..77135cca0 100644
--- a/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp
+++ b/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp
@@ -24,16 +24,22 @@
 #include <loco/IR/NodeShape.h>
 #include <oops/InternalExn.h>
 
+#include <algorithm>
 #include <cmath>
 #include <cstdint>
 #include <limits>
 
+// code referenced from
+// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/
+//    tensorflow/lite/kernels/strided_slice.cc
+//    tensorflow/lite/kernels/internal/strided_slice_logic.h
+
 namespace
 {
 
-// This Op only supports 1-4D cases and since we use the reference 4D
+// This Op only supports 1-5D cases and since we use the reference 4D
 // implementation, the 1-3D tensors are mapped to 4D.
-const int kMaxDim = 4;
+const int kMaxDim = 5;
 
 const loco::DataType S32 = loco::DataType::S32;
 
@@ -42,18 +48,47 @@ using int16 = int16_t;
 
 struct StridedSliceParams
 {
-  int8 start_indices_count;
+  int8 start_indices_count = 0;
   int16 start_indices[kMaxDim];
-  int8 stop_indices_count;
+  int8 stop_indices_count = 0;
   int16 stop_indices[kMaxDim];
-  int8 strides_count;
+  int8 strides_count = 0;
   int16 strides[kMaxDim];
 
-  int16 begin_mask;
-  int16 ellipsis_mask;
-  int16 end_mask;
-  int16 new_axis_mask;
-  int16 shrink_axis_mask;
+  int16 begin_mask = 0;
+  int16 ellipsis_mask = 0;
+  int16 end_mask = 0;
+  int16 new_axis_mask = 0;
+  int16 shrink_axis_mask = 0;
+};
+
+struct StridedSliceContext
+{
+  StridedSliceContext(const luci::CircleStridedSlice *node)
+  {
+    params.begin_mask = node->begin_mask();
+    params.ellipsis_mask = node->ellipsis_mask();
+    params.end_mask = node->end_mask();
+    params.new_axis_mask = node->new_axis_mask();
+    params.shrink_axis_mask = node->shrink_axis_mask();
+
+    input = loco::must_cast<luci::CircleNode *>(node->input());
+    begin = loco::must_cast<luci::CircleConst *>(node->begin());
+    end = loco::must_cast<luci::CircleConst *>(node->end());
+    strides = loco::must_cast<luci::CircleConst *>(node->strides());
+
+    loco::TensorShape input_shape = luci::shape_get(input).as<loco::TensorShape>();
+    input_dims = input_shape.rank();
+  }
+  StridedSliceParams params;
+  luci::CircleNode *input = nullptr;
+  luci::CircleConst *begin = nullptr;
+  luci::CircleConst *end = nullptr;
+  luci::CircleConst *strides = nullptr;
+
+  // Equivalent input shape after adding axis according to new_axis_mask.
+  loco::TensorShape effective_input_shape;
+  uint32_t input_dims = 0;
 };
 
 // Use until std::clamp() is available from C++17.
@@ -70,8 +105,8 @@ inline int Clamp(const int32_t v, const int32_t lo, const int32_t hi)
 // Return the index for the first element along that axis. This index will be a
 // positive integer between [0, axis_size - 1] that can be used to index
 // directly into the data.
-inline int StartForAxis(const StridedSliceParams &params, const loco::TensorShape &input_shape,
-                        uint32_t axis)
+inline int32_t StartForAxis(const StridedSliceParams &params, const loco::TensorShape &input_shape,
+                            uint32_t axis)
 {
   const auto begin_mask = params.begin_mask;
   const auto *start_indices = params.start_indices;
@@ -108,7 +143,16 @@ inline int StartForAxis(const StridedSliceParams &params, const loco::TensorShap
   }
 
   // Clamping
-  start = Clamp(start, 0, axis_size - 1);
+  if (strides[axis] > 0)
+  {
+    // Forward iteration
+    start = Clamp(start, 0, axis_size);
+  }
+  else
+  {
+    // Backward iteration
+    start = Clamp(start, -1, axis_size - 1);
+  }
 
   return start;
 }
@@ -118,14 +162,14 @@ inline int StartForAxis(const StridedSliceParams &params, const loco::TensorShap
 // element. ie. So if you were iterating through all elements of a 1D array of
 // size 4, this function would return 4 as the stop, because it is one past the
 // "real" indices of 0, 1, 2 & 3.
-inline int StopForAxis(const StridedSliceParams &params, const loco::TensorShape &input_shape,
-                       int axis, int start_for_axis)
+inline int32_t StopForAxis(const StridedSliceParams &params, const loco::TensorShape &input_shape,
+                           int32_t axis, int32_t start_for_axis)
 {
   const auto end_mask = params.end_mask;
   const auto shrink_axis_mask = params.shrink_axis_mask;
   const auto *stop_indices = params.stop_indices;
   const auto *strides = params.strides;
-  const int axis_size = static_cast<int32_t>(input_shape.dim(axis).value());
+  const int32_t axis_size = static_cast<int32_t>(input_shape.dim(axis).value());
   if (axis_size == 0)
   {
     return 0;
@@ -141,7 +185,7 @@ inline int StopForAxis(const StridedSliceParams &params, const loco::TensorShape
   // already been adjusted for negative indices.
   if (shrink_axis)
   {
-    stop = start_for_axis + 1;
+    return start_for_axis + 1;
   }
 
   // end_mask override
@@ -183,37 +227,125 @@ inline int StopForAxis(const StridedSliceParams &params, const loco::TensorShape
   return stop;
 }
 
-StridedSliceParams BuildStridedSliceParams(const luci::CircleStridedSlice *node)
+StridedSliceParams BuildStridedSliceParams(StridedSliceContext *op_context)
 {
   StridedSliceParams op_params;
 
-  if (kMaxDim < node->rank())
+  // The ellipsis_mask and new_axis_mask in op_params are not used. Those masks
+  // are processed here to update begin_mask, end_mask and the index range.
+  op_params.begin_mask = 0;
+  op_params.ellipsis_mask = 0;
+  op_params.end_mask = 0;
+  op_params.new_axis_mask = 0;
+  op_params.shrink_axis_mask = 0;
+
+  // Count indexes where the new_axis_mask is set but the ellipsis_mask is not.
+  loco::TensorShape begin_shape = luci::shape_get(op_context->begin).as<loco::TensorShape>();
+  const uint32_t begin_count = begin_shape.dim(0).value();
+  uint32_t num_add_axis = 0;
+  for (uint32_t i = 0; i < begin_count; ++i)
   {
-    INTERNAL_EXN_V("Cannot support StridedSlice rank > ", kMaxDim);
+    if (!((1 << i) & op_context->params.ellipsis_mask) &&
+        ((1 << i) & op_context->params.new_axis_mask))
+    {
+      num_add_axis++;
+    }
   }
 
-  auto begin_node = loco::must_cast<luci::CircleConst *>(node->begin());
-  auto end_node = loco::must_cast<luci::CircleConst *>(node->end());
-  auto strides_node = loco::must_cast<luci::CircleConst *>(node->strides());
+  // Calculate the dims of input after adding new axises.
+  const uint32_t effective_dims = op_context->input_dims + num_add_axis;
+
+  // If begin, end and strides are not fully provided, it means Ellipsis should
+  // be expanded to multiple dimensions (Ex: for spec [Ellipsis, 2] on a 3D
+  // input, the Ellipsis should be applied for the first 2 dimensions). Besides,
+  // If the new_axis_mask and the ellipsis_mask are set at the same index, the
+  // new_axis_mask will have no effect.
+  int32_t effective_ellipsis_mask = 0, effective_new_axis_mask = 0;
+  uint32_t ellipsis_start_idx = effective_dims, expanded_ellipsis = 0;
+  for (uint32_t i = 0; i < effective_dims;)
+  {
+    if ((1 << i) & op_context->params.ellipsis_mask)
+    {
+      ellipsis_start_idx = i;
+      uint32_t ellipsis_end_idx =
+        std::max(i + 1, std::min(i + 1 + num_add_axis + op_context->input_dims - begin_count,
+                                 effective_dims));
+      expanded_ellipsis = ellipsis_end_idx - ellipsis_start_idx - 1;
+
+      // Set bit for effective_ellipsis_mask.
+      for (; i < ellipsis_end_idx; ++i)
+      {
+        effective_ellipsis_mask |= (1 << i);
+      }
+      continue;
+    }
 
-  uint32_t dims_count = begin_node->size<S32>();
+    if ((1 << (i - expanded_ellipsis)) & op_context->params.new_axis_mask)
+    {
+      effective_new_axis_mask |= (1 << i);
+    }
+    ++i;
+  }
 
-  op_params.start_indices_count = dims_count;
-  op_params.stop_indices_count = dims_count;
-  op_params.strides_count = dims_count;
+  // Calculate effective_input_shape and its corresponding begin, end, strides.
+  loco::TensorShape input_shape = luci::shape_get(op_context->input).as<loco::TensorShape>();
+  uint32_t added_ellipsis = 0, added_axises = 0;
+  op_context->effective_input_shape.rank(effective_dims);
 
-  for (uint32_t i = 0; i < dims_count; ++i)
+  for (uint32_t i = 0; i < effective_dims; ++i)
   {
-    op_params.start_indices[i] = begin_node->at<S32>(i);
-    op_params.stop_indices[i] = end_node->at<S32>(i);
-    op_params.strides[i] = strides_node->at<S32>(i);
+    if ((1 << i) & effective_ellipsis_mask)
+    {
+      // If ellipsis_mask, set the begin_mask and end_mask at that index.
+      added_ellipsis = std::max(0u, i - ellipsis_start_idx);
+      op_params.begin_mask |= (1 << i);
+      op_params.end_mask |= (1 << i);
+      op_params.strides[i] = 1;
+      op_context->effective_input_shape.dim(i) = input_shape.dim(i - added_axises);
+    }
+    else if ((1 << i) & effective_new_axis_mask)
+    {
+      // If new_axis_mask is set, it is equivalent to adding a new dim of 1 to
+      // input tensor. Store added shape to effective_input_shape.
+      op_params.start_indices[i] = 0;
+      op_params.stop_indices[i] = 1;
+      op_params.strides[i] = 1;
+      op_context->effective_input_shape.dim(i) = loco::Dimension(1);
+      added_axises++;
+    }
+    else if (i >= begin_count + expanded_ellipsis)
+    {
+      op_params.start_indices[i] = 0;
+      op_params.stop_indices[i] = 0;
+      op_params.strides[i] = 1;
+      op_params.begin_mask |= (1 << i);
+      op_params.end_mask |= (1 << i);
+      op_context->effective_input_shape.dim(i) = input_shape.dim(i - added_axises);
+    }
+    else
+    {
+      const uint32_t orig_idx = i - added_ellipsis;
+      op_params.start_indices[i] = op_context->begin->at<S32>(orig_idx);
+      op_params.stop_indices[i] = op_context->end->at<S32>(orig_idx);
+      op_params.strides[i] = op_context->strides->at<S32>(orig_idx);
+      if (op_context->params.begin_mask & (1 << orig_idx))
+      {
+        op_params.begin_mask |= (1 << i);
+      }
+      if (op_context->params.end_mask & (1 << orig_idx))
+      {
+        op_params.end_mask |= (1 << i);
+      }
+      if (op_context->params.shrink_axis_mask & (1 << orig_idx))
+      {
+        op_params.shrink_axis_mask |= (1 << i);
+      }
+      op_context->effective_input_shape.dim(i) = input_shape.dim(i - added_axises);
+    }
   }
-
-  op_params.begin_mask = node->begin_mask();
-  op_params.ellipsis_mask = 0;
-  op_params.end_mask = node->end_mask();
-  op_params.new_axis_mask = 0;
-  op_params.shrink_axis_mask = node->shrink_axis_mask();
+  op_params.start_indices_count = effective_dims;
+  op_params.stop_indices_count = effective_dims;
+  op_params.strides_count = effective_dims;
 
   return op_params;
 }
@@ -241,55 +373,54 @@ loco::TensorShape infer_output_shape(const CircleStridedSlice *node)
   LUCI_ASSERT(end_node->dtype() == S32, "Only support S32 for end_node");
   LUCI_ASSERT(strides_node->dtype() == S32, "Only support S32 for strides_node");
 
-  assert(node->ellipsis_mask() == 0);
-  assert(node->new_axis_mask() == 0);
+  LUCI_ASSERT(begin_node->rank() == 1, "Only support rank 1 for begin_node");
+  LUCI_ASSERT(end_node->rank() == 1, "Only support rank 1 for end_node");
+  LUCI_ASSERT(strides_node->rank() == 1, "Only support rank 1 for strides_node");
 
-  auto op_params = BuildStridedSliceParams(node);
   loco::TensorShape input_shape = luci::shape_get(input_node).as<loco::TensorShape>();
 
-  uint32_t num_input_axes = input_shape.rank();
-  assert(begin_node->size<S32>() <= num_input_axes);
-  assert(end_node->size<S32>() <= num_input_axes);
-  assert(strides_node->size<S32>() <= num_input_axes);
-  for (uint32_t i = 0; i < strides_node->size<S32>(); i++)
-  {
-    LUCI_ASSERT(strides_node->at<S32>(i) != 0, "Stride value has to be non-zero");
-  }
+  assert(begin_node->size<S32>() <= input_shape.rank());
+  assert(end_node->size<S32>() <= input_shape.rank());
+  assert(strides_node->size<S32>() <= input_shape.rank());
 
-  uint32_t shape_size = 0;
-  std::array<int32_t, 16> output_shape_data;
+  StridedSliceContext op_context(node);
+  auto op_params = BuildStridedSliceParams(&op_context);
+  auto effective_input_shape = op_context.effective_input_shape;
+  std::vector<int32_t> output_shape_vector;
 
-  for (uint32_t idx = 0; idx < num_input_axes; ++idx)
+  for (int32_t idx = effective_input_shape.rank() - 1; idx >= 0; --idx)
   {
-    int32_t begin = StartForAxis(op_params, input_shape, idx);
-    int32_t end = StopForAxis(op_params, input_shape, idx, begin);
-    if (end < 0)
-      end = input_shape.dim(idx).value() + end + 1;
+    int32_t stride = op_params.strides[idx];
+    LUCI_ASSERT(stride != 0, "stride value has to be non-zero");
 
-    // This is valid for both positive and negative strides
-    int32_t stride = strides_node->at<S32>(idx);
-    int32_t dim_shape = std::ceil(static_cast<float>(end - begin) / stride);
-    assert(dim_shape > 0);
+    int32_t begin = StartForAxis(op_params, effective_input_shape, idx);
+    int32_t end = StopForAxis(op_params, effective_input_shape, idx, begin);
 
     // When shrinking an axis, the end position does not matter (and can be
     // incorrect when negative indexing is used, see Issue #19260). Always use
     // begin + 1 to generate a length 1 slice, since begin has
-    // already been adjusted for negative indices by StartForAxis.
-    const bool shrink_axis = node->shrink_axis_mask() & (1 << idx);
+    // already been adjusted for negative indices by GetBeginValueAtIndex.
+    const bool shrink_axis = op_params.shrink_axis_mask & (1 << idx);
     if (shrink_axis)
     {
-      assert(dim_shape == 1);
+      end = begin + 1;
     }
-    else
+
+    // This is valid for both positive and negative strides
+    int32_t dim_shape = std::ceil((end - begin) / static_cast<float>(stride));
+    dim_shape = dim_shape < 0 ? 0 : dim_shape;
+    if (!shrink_axis)
     {
-      output_shape_data[shape_size++] = dim_shape;
+      output_shape_vector.push_back(dim_shape);
     }
   }
 
+  auto shape_size = output_shape_vector.size();
   output_shape.rank(shape_size);
   for (uint32_t idx = 0; idx < shape_size; ++idx)
   {
-    output_shape.dim(idx) = output_shape_data[idx];
+    // reverse copy
+    output_shape.dim(idx) = output_shape_vector.at(shape_size - 1u - idx);
   }
 
   return output_shape;
diff --git a/compiler/luci/tests/test.lst b/compiler/luci/tests/test.lst
index 94e723f21..09a25ff08 100644
--- a/compiler/luci/tests/test.lst
+++ b/compiler/luci/tests/test.lst
@@ -39,6 +39,7 @@ addread(Conv2D_003)
 addread(Conv2D_U8_000)
 addread(Conv2D_U8_001)
 addread(Cos_000)
+addread(Densify_000)
 addread(DepthToSpace_000)
 addread(DepthwiseConv2D_000)
 addread(DepthwiseConv2D_U8_000)
@@ -265,6 +266,7 @@ addwrite(Conv2D_003)
 addwrite(Conv2D_U8_000)
 addwrite(Conv2D_U8_001)
 addwrite(Cos_000)
+addwrite(Densify_000)
 addwrite(DepthToSpace_000)
 addwrite(DepthwiseConv2D_000)
 addwrite(DepthwiseConv2D_U8_000)
diff --git a/compiler/mio-circle04/include/mio_circle/Helper.h b/compiler/mio-circle04/include/mio_circle/Helper.h
index d3ffc23e5..7a1ba2b2f 100644
--- a/compiler/mio-circle04/include/mio_circle/Helper.h
+++ b/compiler/mio-circle04/include/mio_circle/Helper.h
@@ -19,6 +19,8 @@
 
 #include <mio/circle/schema_generated.h>
 
+#include <vector>
+
 namespace mio
 {
 namespace circle
@@ -31,6 +33,21 @@ std::string opcode_name(const ::circle::OperatorCode *opcode);
 const char *tensor_type(const ::circle::Tensor *tensor);
 const char *tensor_name(const ::circle::Tensor *tensor);
 
+template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
+{
+  if (flat_array == nullptr)
+  {
+    throw std::runtime_error("flat array is nullptr");
+  }
+
+  std::vector<T> ret(flat_array->Length());
+  for (uint32_t i = 0; i < flat_array->Length(); i++)
+  {
+    ret[i] = flat_array->Get(i);
+  }
+  return ret;
+}
+
 } // namespace circle
 } // namespace mio
 
diff --git a/compiler/mio-circle04/include/mio_circle/Reader.h b/compiler/mio-circle04/include/mio_circle/Reader.h
new file mode 100644
index 000000000..630646732
--- /dev/null
+++ b/compiler/mio-circle04/include/mio_circle/Reader.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_CIRCLE04_READER_H__
+#define __MIO_CIRCLE04_READER_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+// NOTE Reader class originated from circledump and for circle-tensordump
+//      where this class has more work to be done for stability
+//      as the tools are for developers not customores.
+
+namespace mio
+{
+namespace circle
+{
+
+/**
+ * @brief Loads Circle file and provides helpers to access attributes
+ */
+class Reader
+{
+private:
+  using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<::circle::SubGraph>>;
+  using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Buffer>>;
+  using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Tensor>>;
+  using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Operator>>;
+  using CircleMetadata_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Metadata>>;
+  using CircleSignatureDef_t = flatbuffers::Vector<flatbuffers::Offset<::circle::SignatureDef>>;
+
+public:
+  Reader(const ::circle::Model *model);
+
+  Reader() = delete;
+
+public:
+  uint32_t version() const { return _version; }
+
+  const std::vector<const ::circle::OperatorCode *> &opcodes() { return _op_codes; }
+  const CircleBuffers_t *buffers() { return _buffers; }
+  const CircleTensors_t *tensors() { return _tensors; }
+  const CircleOperators_t *operators() { return _operators; }
+  const std::vector<int32_t> &inputs() const { return _inputs; }
+  const std::vector<int32_t> &outputs() const { return _outputs; }
+  const ::circle::DataFormat &data_format() const { return _data_format; }
+  const CircleMetadata_t *metadata() const { return _metadata; }
+  const CircleSignatureDef_t *signature_defs() const { return _signature_defs; }
+
+  uint32_t num_subgraph() const { return _subgraphs->Length(); }
+
+  size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
+  ::circle::BuiltinOperator builtin_code(const ::circle::Operator *op) const;
+  std::string opcode_name(const ::circle::Operator *op) const;
+  std::vector<int32_t> outputs(const ::circle::Operator *op) const;
+  std::string tensor_name(const ::circle::Tensor *tensor) const;
+  std::string tensor_dtype(const ::circle::Tensor *tensor) const;
+
+public:
+  bool select_subgraph(uint32_t subgraph);
+  const std::string &subgraph_name(void) const { return _subgraph_name; }
+  uint32_t subgraph_index(void) const { return _subgraph_index; }
+
+private:
+  uint32_t _version;
+
+  const CircleSubGraphs_t *_subgraphs{nullptr};
+  const CircleBuffers_t *_buffers{nullptr};
+  const CircleTensors_t *_tensors{nullptr};
+  const CircleOperators_t *_operators{nullptr};
+  const CircleMetadata_t *_metadata{nullptr};
+  const CircleSignatureDef_t *_signature_defs{nullptr};
+
+  uint32_t _subgraph_index = 0;
+  std::string _subgraph_name;
+  std::vector<const ::circle::OperatorCode *> _op_codes;
+  std::vector<int32_t> _inputs;
+  std::vector<int32_t> _outputs;
+  ::circle::DataFormat _data_format = ::circle::DataFormat::DataFormat_CHANNELS_FIRST;
+};
+
+} // namespace circle
+} // namespace mio
+
+#endif // __MIO_CIRCLE04_READER_H__
diff --git a/compiler/mio-circle04/src/Reader.cpp b/compiler/mio-circle04/src/Reader.cpp
new file mode 100644
index 000000000..880ffaec8
--- /dev/null
+++ b/compiler/mio-circle04/src/Reader.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Reader.h"
+#include "mio_circle/Helper.h"
+
+#include <sstream>
+#include <string>
+
+namespace mio
+{
+namespace circle
+{
+
+Reader::Reader(const ::circle::Model *model)
+{
+  if (model == nullptr)
+  {
+    throw std::runtime_error("Invalid model");
+  }
+
+  _version = model->version();
+  _subgraphs = model->subgraphs();
+  _buffers = model->buffers();
+  _metadata = model->metadata();
+  _signature_defs = model->signature_defs();
+
+  auto opcodes = model->operator_codes();
+  for (const ::circle::OperatorCode *opcode : *opcodes)
+  {
+    _op_codes.push_back(opcode);
+  }
+}
+
+size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data)
+{
+  if (buff_data != nullptr)
+  {
+    *buff_data = nullptr;
+  }
+
+  if (buf_idx == 0)
+    return 0;
+
+  if (auto *buffer = (*_buffers)[buf_idx])
+  {
+    if (auto *array = buffer->data())
+    {
+      if (size_t size = array->size())
+      {
+        if (buff_data != nullptr)
+        {
+          *buff_data = reinterpret_cast<const uint8_t *>(array->data());
+        }
+        return size;
+      }
+    }
+  }
+
+  return 0;
+}
+
+::circle::BuiltinOperator Reader::builtin_code(const ::circle::Operator *op) const
+{
+  uint32_t index = op->opcode_index();
+  assert(index < _op_codes.size());
+  const ::circle::OperatorCode *opcode = _op_codes.at(index);
+
+  return mio::circle::builtin_code_neutral(opcode);
+}
+
+std::string Reader::opcode_name(const ::circle::Operator *op) const
+{
+  uint32_t index = op->opcode_index();
+  assert(index < _op_codes.size());
+  const ::circle::OperatorCode *opcode = _op_codes.at(index);
+
+  if (!mio::circle::is_valid(opcode))
+  {
+    std::ostringstream oss;
+    oss << "(invalid: " << index << ")";
+    return oss.str();
+  }
+
+  return mio::circle::opcode_name(opcode);
+}
+
+std::vector<int32_t> Reader::outputs(const ::circle::Operator *op) const
+{
+  return as_index_vector(op->outputs());
+}
+
+std::string Reader::tensor_name(const ::circle::Tensor *tensor) const
+{
+  return mio::circle::tensor_name(tensor);
+}
+
+std::string Reader::tensor_dtype(const ::circle::Tensor *tensor) const
+{
+  return mio::circle::tensor_type(tensor);
+}
+
+bool Reader::select_subgraph(uint32_t sgindex)
+{
+  _subgraph_index = sgindex;
+  _tensors = nullptr;
+  _operators = nullptr;
+
+  _inputs.clear();
+  _outputs.clear();
+
+  if (_subgraphs->Length() <= sgindex)
+  {
+    assert(false);
+    return false;
+  }
+
+  const ::circle::SubGraph *subgraph = (*_subgraphs)[sgindex];
+
+  auto name = subgraph->name();
+  _subgraph_name = name ? name->c_str() : "(noname)";
+
+  _tensors = subgraph->tensors();
+  _operators = subgraph->operators();
+  _data_format = subgraph->data_format();
+
+  _inputs = as_index_vector(subgraph->inputs());
+  _outputs = as_index_vector(subgraph->outputs());
+
+  return true;
+}
+
+} // namespace circle
+} // namespace mio
diff --git a/compiler/mio-circle04/src/Reader.test.cpp b/compiler/mio-circle04/src/Reader.test.cpp
new file mode 100644
index 000000000..104454a62
--- /dev/null
+++ b/compiler/mio-circle04/src/Reader.test.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Reader.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <gtest/gtest.h>
+
+class mio_circle04_reader_test : public ::testing::Test
+{
+protected:
+  void initialization_emty(void)
+  {
+    _model = circle::CreateModelDirect(_fbb, 0, &_opcodes_vec);
+    circle::FinishModelBuffer(_fbb, _model);
+  }
+
+  const circle::Model *circleModel(void)
+  {
+    auto ptr = _fbb.GetBufferPointer();
+    return circle::GetModel(ptr);
+  }
+
+private:
+  flatbuffers::FlatBufferBuilder _fbb;
+  flatbuffers::Offset<circle::Model> _model;
+  std::vector<flatbuffers::Offset<circle::OperatorCode>> _opcodes_vec;
+};
+
+TEST_F(mio_circle04_reader_test, null_Model_NEG)
+{
+  EXPECT_THROW(mio::circle::Reader reader(nullptr), std::runtime_error);
+}
+
+TEST_F(mio_circle04_reader_test, empty_Model)
+{
+  initialization_emty();
+
+  const circle::Model *model = circleModel();
+  EXPECT_NE(nullptr, model);
+
+  mio::circle::Reader reader(model);
+
+  SUCCEED();
+}
+
+// TODO add more tests
diff --git a/compiler/mio-tflite/README.md b/compiler/mio-tflite/README.md
index 187b1a5c6..c717ab877 100644
--- a/compiler/mio-tflite/README.md
+++ b/compiler/mio-tflite/README.md
@@ -1,3 +1,5 @@
 # mio-tflite
 
 _mio-tflite_ provides a library to access TensorFlow lite model files
+
+NOTE: _mio-tflite_ is currently obsolete
diff --git a/compiler/mio-tflite260/README.md b/compiler/mio-tflite260/README.md
index 970569b47..86d2998ed 100644
--- a/compiler/mio-tflite260/README.md
+++ b/compiler/mio-tflite260/README.md
@@ -1,3 +1,5 @@
 # mio-tflite260
 
 _mio-tflite260_ provides a library to access TensorFlow lite model files with V2.6.0.
+
+NOTE: _mio-tflite260_ is currently obsolete
diff --git a/compiler/mir/include/mir/Graph.h b/compiler/mir/include/mir/Graph.h
index bf94cfb14..37bfdb361 100644
--- a/compiler/mir/include/mir/Graph.h
+++ b/compiler/mir/include/mir/Graph.h
@@ -103,6 +103,10 @@ private:
 
 /**
  * @brief Returns nodes of the graph sorted topologically.
+ * @note  Sorting order priority
+ * 1) Graph input node (input index order)
+ * 2) Constant node (unordered - cannot predict order)
+ * 3) Ready node (unordered - cannot predict order)
  */
 std::vector<Operation *> getSortedNodes(Graph *graph);
 
diff --git a/compiler/mir/src/Graph.cpp b/compiler/mir/src/Graph.cpp
index 04b005de4..05d6dc9bd 100644
--- a/compiler/mir/src/Graph.cpp
+++ b/compiler/mir/src/Graph.cpp
@@ -44,9 +44,16 @@ std::vector<Operation *> getSortedNodes(Graph *graph)
   std::deque<Operation *> ready_nodes;
   std::unordered_map<Operation *, std::size_t> num_visited_input_edges;
 
+  // Use input vector first to maintain correct input order
+  for (Operation *op : graph->getInputs())
+  {
+    ready_nodes.push_back(op);
+  }
+
   for (Operation *op : graph->getNodes())
   {
-    if (op->getNumInputs() == 0)
+    // Skip already pushed input node
+    if ((op->getNumInputs() == 0) && (op->getType() != Operation::Type::input))
     {
       ready_nodes.push_back(op);
     }
diff --git a/compiler/mir2loco/src/mir2loco.test.cpp b/compiler/mir2loco/src/mir2loco.test.cpp
index 92ab99488..244c92aa8 100644
--- a/compiler/mir2loco/src/mir2loco.test.cpp
+++ b/compiler/mir2loco/src/mir2loco.test.cpp
@@ -383,28 +383,49 @@ TEST_F(TestTransformer_mir2loco, Conv2D_Test)
   auto loco_graph = transformer.transform(&mir_graph);
 
   loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
-  loco::ConstGen *const_node = dynamic_cast<loco::ConstGen *>(loco_graph->nodes()->at(1));
-  loco::FeatureEncode *encode_node =
-    dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(2));
-  loco::FilterEncode *filter_node = dynamic_cast<loco::FilterEncode *>(loco_graph->nodes()->at(3));
-  loco::Conv2D *conv_node = dynamic_cast<loco::Conv2D *>(loco_graph->nodes()->at(4));
-  loco::FeatureDecode *decode_node =
-    dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(5));
-  loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(6));
-
   ASSERT_NE(pull_node, nullptr);
+
+  // ConstGen: Only one ConstGen node
+  // We can convince that this node is input of FilterEncode because this is only ConstGen node
+  loco::ConstGen *const_node = dynamic_cast<loco::ConstGen *>(loco_graph->nodes()->at(1));
   ASSERT_NE(const_node, nullptr);
-  ASSERT_NE(filter_node, nullptr);
+
+  // FeatureEncode
+  auto pull_uses = loco::succs(pull_node);
+  ASSERT_EQ(pull_uses.size(), 1);
+  loco::FeatureEncode *encode_node = dynamic_cast<loco::FeatureEncode *>(*pull_uses.begin());
   ASSERT_NE(encode_node, nullptr);
-  ASSERT_NE(conv_node, nullptr);
-  ASSERT_NE(decode_node, nullptr);
-  ASSERT_NE(push_node, nullptr);
   ASSERT_EQ(encode_node->input(), pull_node);
-  ASSERT_EQ(filter_node->input(), const_node);
+
+  // Conv2D
+  auto encode_uses = loco::succs(encode_node);
+  ASSERT_EQ(encode_uses.size(), 1);
+  loco::Conv2D *conv_node = dynamic_cast<loco::Conv2D *>(*encode_uses.begin());
+  ASSERT_NE(conv_node, nullptr);
   ASSERT_EQ(conv_node->ifm(), encode_node);
+
+  // FilterEncode
+  auto const_uses = loco::succs(const_node);
+  ASSERT_EQ(const_uses.size(), 1);
+  loco::FilterEncode *filter_node = dynamic_cast<loco::FilterEncode *>(*const_uses.begin());
+  ASSERT_NE(filter_node, nullptr);
+  ASSERT_EQ(filter_node->input(), const_node);
   ASSERT_EQ(conv_node->ker(), filter_node);
+
+  // FeatureDecode
+  auto conv_uses = loco::succs(conv_node);
+  ASSERT_EQ(conv_uses.size(), 1);
+  loco::FeatureDecode *decode_node = dynamic_cast<loco::FeatureDecode *>(*conv_uses.begin());
+  ASSERT_NE(decode_node, nullptr);
   ASSERT_EQ(decode_node->input(), conv_node);
+
+  // Push
+  auto decode_uses = loco::succs(decode_node);
+  ASSERT_EQ(decode_uses.size(), 1);
+  loco::Push *push_node = dynamic_cast<loco::Push *>(*decode_uses.begin());
+  ASSERT_NE(push_node, nullptr);
   ASSERT_EQ(push_node->from(), decode_node);
+
   // Check params
   ASSERT_EQ(conv_node->pad()->top(), 5);
   ASSERT_EQ(conv_node->pad()->left(), 9);
diff --git a/compiler/moco/import/src/Importer.cpp b/compiler/moco/import/src/Importer.cpp
index 333f0f6a9..0659fd165 100644
--- a/compiler/moco/import/src/Importer.cpp
+++ b/compiler/moco/import/src/Importer.cpp
@@ -190,7 +190,7 @@ std::unique_ptr<loco::Graph> Importer::import(const ModelSignature &signature,
 
   convert_graph(*source_ptr, signature, tf_graph_def, graph.get());
 
-  return std::move(graph);
+  return graph;
 }
 
 } // namespace moco
diff --git a/compiler/moco/lang/src/IR/TFNode.cpp b/compiler/moco/lang/src/IR/TFNode.cpp
index 55c0e0c64..b59a505b5 100644
--- a/compiler/moco/lang/src/IR/TFNode.cpp
+++ b/compiler/moco/lang/src/IR/TFNode.cpp
@@ -17,6 +17,7 @@
 #include "moco/IR/TFNode.h"
 #include "moco/IR/TFDialect.h"
 
+#include <limits>
 #include <memory>
 #include <cassert>
 
diff --git a/compiler/one-cmds/CMakeLists.txt b/compiler/one-cmds/CMakeLists.txt
index 8732340ae..90e989a00 100644
--- a/compiler/one-cmds/CMakeLists.txt
+++ b/compiler/one-cmds/CMakeLists.txt
@@ -8,7 +8,9 @@ set(ONE_COMMAND_FILES
     one-optimize
     one-quantize
     one-pack
+    one-partition
     one-profile
+    one-infer
     one-codegen
     one-prepare-venv
     onecc
@@ -74,7 +76,11 @@ endforeach(ONE_UTILITY)
 
 # make python directory
 set(ONE_PYTHON_FILES constant.py
-                     make_cmd.py)
+                     make_cmd.py
+                     CfgRunner.py
+                     OptionBuilder.py
+                     TopologicalSortHelper.py
+                     WorkflowRunner.py)
 
 foreach(ONE_PYTHON_FILE IN ITEMS ${ONE_PYTHON_FILES})
 
diff --git a/compiler/one-cmds/dummy-driver/CMakeLists.txt b/compiler/one-cmds/dummy-driver/CMakeLists.txt
index 690a60776..2552a02db 100644
--- a/compiler/one-cmds/dummy-driver/CMakeLists.txt
+++ b/compiler/one-cmds/dummy-driver/CMakeLists.txt
@@ -1,16 +1,25 @@
 # dummy driver for interface test
 set(DUMMY_DRIVER_SRC src/dummy-compile.cpp)
 set(HELP_DRIVER_SRC src/help-compile.cpp)
+set(DUMMY_INFER_SRC src/dummy-infer.cpp)
+set(DUMMY_INFER_V2_SRC src/dummy-inferV2.cpp)
+set(HELP_INFER_SRC src/help-infer.cpp)
 set(DUMMY_PROFILE_SRC src/dummy-profile.cpp)
 set(HELP_PROFILE_SRC src/help-profile.cpp)
 
 add_executable(dummy-compile ${DUMMY_DRIVER_SRC})
 add_executable(help-compile ${HELP_DRIVER_SRC})
+add_executable(dummy-infer ${DUMMY_INFER_SRC})
+add_executable(dummy-inferV2 ${DUMMY_INFER_V2_SRC})
+add_executable(help-infer ${HELP_INFER_SRC})
 add_executable(dummy-profile ${DUMMY_PROFILE_SRC})
 add_executable(help-profile ${HELP_PROFILE_SRC})
 
 set(DUMMY_DRIVER "${CMAKE_CURRENT_BINARY_DIR}/dummy-compile")
 set(HELP_DRIVER "${CMAKE_CURRENT_BINARY_DIR}/help-compile")
+set(DUMMY_INFER "${CMAKE_CURRENT_BINARY_DIR}/dummy-infer")
+set(DUMMY_INFER_V2 "${CMAKE_CURRENT_BINARY_DIR}/dummy-inferV2")
+set(HELP_INFER "${CMAKE_CURRENT_BINARY_DIR}/help-infer")
 set(DUMMY_PROFILE "${CMAKE_CURRENT_BINARY_DIR}/dummy-profile")
 set(HELP_PROFILE "${CMAKE_CURRENT_BINARY_DIR}/help-profile")
 
@@ -26,6 +35,24 @@ install(FILES ${HELP_DRIVER}
                     WORLD_READ WORLD_EXECUTE
         DESTINATION test)
 
+install(FILES ${DUMMY_INFER}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
+install(FILES ${DUMMY_INFER_V2}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
+install(FILES ${HELP_INFER}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
 install(FILES ${DUMMY_PROFILE}
         PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
                     GROUP_READ GROUP_EXECUTE
diff --git a/compiler/one-cmds/dummy-driver/src/dummy-infer.cpp b/compiler/one-cmds/dummy-driver/src/dummy-infer.cpp
new file mode 100644
index 000000000..60f5faefa
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/dummy-infer.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * dummy-infer only tests its interface rather than its functionality.
+ *
+ * ./dummy-infer ${INPUT_NAME}
+ * dummy-infer dummy output!!!
+ */
+
+#include <iostream>
+
+int main(int argc, char **argv)
+{
+  if (argc != 2)
+    return EXIT_FAILURE;
+
+  std::cout << "dummy-infer dummy output!!!" << std::endl;
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/dummy-driver/src/dummy-inferV2.cpp b/compiler/one-cmds/dummy-driver/src/dummy-inferV2.cpp
new file mode 100644
index 000000000..4b93c70a3
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/dummy-inferV2.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * dummy-infer only tests its interface rather than its functionality.
+ *
+ * ./dummy-infer ${INPUT_NAME}
+ * Do inference of ${INPUT_NAME}
+ */
+
+#include <iostream>
+
+int main(int argc, char **argv)
+{
+  if (argc != 2)
+    return EXIT_FAILURE;
+
+  std::cout << "Do inference of " + std::string(argv[1]) << std::endl;
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/dummy-driver/src/help-infer.cpp b/compiler/one-cmds/dummy-driver/src/help-infer.cpp
new file mode 100644
index 000000000..821d368d4
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/help-infer.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * help-infer prints dummy help message.
+ *
+ * $ ./help-infer -h
+ * HELP MESSAGE!!
+ */
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+int main(int argc, char **argv)
+{
+  if (argc != 2)
+    return EXIT_FAILURE;
+
+  std::string opt_h{"-h"};
+  std::string argv_1{argv[1]};
+
+  if (opt_h != argv_1)
+    return EXIT_FAILURE;
+
+  std::cout << "HELP MESSAGE!!" << std::endl;
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/how-to-use-one-commands.txt b/compiler/one-cmds/how-to-use-one-commands.txt
index ebc165167..2352bbd7a 100644
--- a/compiler/one-cmds/how-to-use-one-commands.txt
+++ b/compiler/one-cmds/how-to-use-one-commands.txt
@@ -153,6 +153,7 @@ Current transformation options are
 - expand_broadcast_const : This will expand broadcastable constant node inputs
 - fold_add_v2 : This removes AddV2 operation which can be folded
 - fold_cast : This removes Cast operation which can be folded
+- fold_densify: This removes Densify operator which can be folded
 - fold_dequantize : This removes Dequantize operation which can be folded
 - fold_dwconv : This folds Depthwise Convolution operation which can be folded
 - fold_gather : This removes Gather operation which can be folded
@@ -205,10 +206,6 @@ Current transformation options are
 - transform_min_max_to_relu6: This will transform Minimum-Maximum pattern to Relu6 operator.
 - transform_min_relu_to_relu6: This will transform Minimum(6)-Relu pattern to Relu6 operator.
 
-There are options to enable multiple options at once for convenience.
-- O1: fuse_bcq, fuse_instnorm, resolve_customop_add, resolve_customop_batchmatmul,
-  resolve_customop_matmul, remove_redundant_transpose, substitute_pack_to_reshape
-
 
 one-quantize
 ------------
diff --git a/compiler/one-cmds/one-build b/compiler/one-cmds/one-build
index 5c313b44b..4b1f98070 100644
--- a/compiler/one-cmds/one-build
+++ b/compiler/one-cmds/one-build
@@ -22,7 +22,6 @@
 import argparse
 import configparser
 import os
-import subprocess
 import sys
 
 import utils as _utils
@@ -83,6 +82,7 @@ def _get_driver_name(driver_name):
         'one-import-onnx': 'one-import-onnx',
         'one-optimize': 'one-optimize',
         'one-quantize': 'one-quantize',
+        'one-partition': 'one-partition',
         'one-pack': 'one-pack',
         'one-codegen': 'one-codegen'
     }[driver_name]
@@ -157,7 +157,8 @@ def main():
     bin_dir = os.path.dirname(os.path.realpath(__file__))
     import_drivers_dict = _utils._detect_one_import_drivers(bin_dir)
     transform_drivers = [
-        'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile'
+        'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile',
+        'one-partition'
     ]
     _verify_cfg(import_drivers_dict, config)
 
diff --git a/compiler/one-cmds/one-build.template.cfg b/compiler/one-cmds/one-build.template.cfg
index e147896ef..42960811e 100644
--- a/compiler/one-cmds/one-build.template.cfg
+++ b/compiler/one-cmds/one-build.template.cfg
@@ -5,6 +5,7 @@ one-import-bcq=False
 one-import-onnx=False
 one-optimize=True
 one-quantize=False
+one-parition=False
 one-pack=True
 one-codegen=False
 
diff --git a/compiler/one-cmds/one-codegen b/compiler/one-cmds/one-codegen
index 726538d44..86e1632e6 100644
--- a/compiler/one-cmds/one-codegen
+++ b/compiler/one-cmds/one-codegen
@@ -25,9 +25,7 @@ import glob
 import itertools
 import ntpath
 import os
-import subprocess
 import sys
-import tempfile
 import shutil
 
 import utils as _utils
diff --git a/compiler/one-cmds/one-import-bcq b/compiler/one-cmds/one-import-bcq
index ef89a9297..c3ef0b275 100644
--- a/compiler/one-cmds/one-import-bcq
+++ b/compiler/one-cmds/one-import-bcq
@@ -21,7 +21,6 @@
 
 import argparse
 import os
-import subprocess
 import sys
 import tempfile
 
@@ -160,9 +159,9 @@ def _convert(args):
             tmpdir,
             os.path.splitext(
                 os.path.basename(generate_bcq_metadata_output_path))[0]) + '.tflite'
-        tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(args, tf2tfliteV2_path,
-                                                       generate_bcq_metadata_output_path,
-                                                       tf2tfliteV2_output_path)
+        tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(
+            args, tf2tfliteV2_path, generate_bcq_metadata_output_path,
+            tf2tfliteV2_output_path)
         try:
             output_arrays_idx = tf2tfliteV2_cmd.index('--output_arrays')
             tf2tfliteV2_cmd[output_arrays_idx + 1] = ','.join(bcq_output_arrays)
@@ -177,8 +176,8 @@ def _convert(args):
         # make a command to convert from tflite to circle
         tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
         tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
-                                                           tf2tfliteV2_output_path,
-                                                           getattr(args, 'output_path'))
+                                                             tf2tfliteV2_output_path,
+                                                             getattr(args, 'output_path'))
 
         f.write((' '.join(tflite2circle_cmd) + '\n').encode())
 
diff --git a/compiler/one-cmds/one-import-onnx b/compiler/one-cmds/one-import-onnx
index eaa136197..ad19c2f59 100644
--- a/compiler/one-cmds/one-import-onnx
+++ b/compiler/one-cmds/one-import-onnx
@@ -21,7 +21,6 @@
 
 import argparse
 import os
-import subprocess
 import sys
 import tempfile
 import onnx
@@ -80,6 +79,12 @@ def _get_parser():
     parser.add_argument('--unroll_rnn', action='store_true', help='Unroll RNN operators')
     parser.add_argument(
         '--unroll_lstm', action='store_true', help='Unroll LSTM operators')
+    parser.add_argument(
+        '--keep_io_order',
+        action='store_true',
+        help=
+        'Ensure generated circle model preserves the I/O order of the original onnx model.'
+    )
 
     # save intermediate file(s)
     parser.add_argument(
@@ -87,6 +92,12 @@ def _get_parser():
         action='store_true',
         help='Save intermediate files to output folder')
 
+    # experimental options
+    parser.add_argument(
+        '--experimental_disable_batchmatmul_unfold',
+        action='store_true',
+        help='Experimental disable BatchMatMul unfold')
+
     return parser
 
 
@@ -124,6 +135,65 @@ def _apply_verbosity(verbosity):
         os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 
 
+# The index of input/output is added in front of the name. For example,
+# Original input names: 'a', 'c', 'b'
+# Renamed: '0001_a', '0002_c', '0003_b'
+# This will preserve I/O order after import.
+def _remap_io_names(onnx_model):
+    # gather existing name of I/O and generate new name of I/O in sort order
+    input_nodes = []
+    output_nodes = []
+    remap_inputs = []
+    remap_outputs = []
+    initializers = []
+    # some models may have initializers as inputs. ignore them.
+    for initializer in onnx_model.graph.initializer:
+        initializers.append(initializer.name)
+    for idx in range(0, len(onnx_model.graph.input)):
+        name = onnx_model.graph.input[idx].name
+        if not name in initializers:
+            input_nodes.append(name)
+            remap_inputs.append(format(idx + 1, '04d') + '_' + name)
+    for idx in range(0, len(onnx_model.graph.output)):
+        name = onnx_model.graph.output[idx].name
+        output_nodes.append(name)
+        remap_outputs.append(format(idx + 1, '04d') + '_' + name)
+    # change names for graph input
+    for i in range(len(onnx_model.graph.input)):
+        if onnx_model.graph.input[i].name in input_nodes:
+            to_rename = onnx_model.graph.input[i].name
+            idx = input_nodes.index(to_rename)
+            onnx_model.graph.input[i].name = remap_inputs[idx]
+    # change names of all nodes in the graph
+    for i in range(len(onnx_model.graph.node)):
+        # check node.input is to change to remap_inputs or remap_outputs
+        for j in range(len(onnx_model.graph.node[i].input)):
+            if onnx_model.graph.node[i].input[j] in input_nodes:
+                to_rename = onnx_model.graph.node[i].input[j]
+                idx = input_nodes.index(to_rename)
+                onnx_model.graph.node[i].input[j] = remap_inputs[idx]
+            if onnx_model.graph.node[i].input[j] in output_nodes:
+                to_rename = onnx_model.graph.node[i].input[j]
+                idx = output_nodes.index(to_rename)
+                onnx_model.graph.node[i].input[j] = remap_outputs[idx]
+        # check node.output is to change to remap_inputs or remap_outputs
+        for j in range(len(onnx_model.graph.node[i].output)):
+            if onnx_model.graph.node[i].output[j] in output_nodes:
+                to_rename = onnx_model.graph.node[i].output[j]
+                idx = output_nodes.index(to_rename)
+                onnx_model.graph.node[i].output[j] = remap_outputs[idx]
+            if onnx_model.graph.node[i].output[j] in input_nodes:
+                to_rename = onnx_model.graph.node[i].output[j]
+                idx = input_nodes.index(to_rename)
+                onnx_model.graph.node[i].output[j] = remap_inputs[idx]
+    # change names for graph output
+    for i in range(len(onnx_model.graph.output)):
+        if onnx_model.graph.output[i].name in output_nodes:
+            to_rename = onnx_model.graph.output[i].name
+            idx = output_nodes.index(to_rename)
+            onnx_model.graph.output[i].name = remap_outputs[idx]
+
+
 def _convert(args):
     _apply_verbosity(args.verbose)
 
@@ -142,6 +212,13 @@ def _convert(args):
             options.unroll_rnn = _utils._is_valid_attr(args, 'unroll_rnn')
             options.unroll_lstm = _utils._is_valid_attr(args, 'unroll_lstm')
             onnx_legalizer.legalize(onnx_model, options)
+        if _utils._is_valid_attr(args, 'keep_io_order'):
+            _remap_io_names(onnx_model)
+            if _utils._is_valid_attr(args, 'save_intermediate'):
+                basename = os.path.basename(getattr(args, 'input_path'))
+                fixed_path = os.path.join(tmpdir,
+                                          os.path.splitext(basename)[0] + '~.onnx')
+                onnx.save(onnx_model, fixed_path)
         tf_savedmodel = onnx_tf.backend.prepare(onnx_model)
 
         savedmodel_name = os.path.splitext(os.path.basename(
@@ -166,8 +243,8 @@ def _convert(args):
         # make a command to convert from tflite to circle
         tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
         tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
-                                                           tf2tfliteV2_output_path,
-                                                           getattr(args, 'output_path'))
+                                                             tf2tfliteV2_output_path,
+                                                             getattr(args, 'output_path'))
 
         f.write((' '.join(tflite2circle_cmd) + '\n').encode())
 
diff --git a/compiler/one-cmds/one-import-pytorch b/compiler/one-cmds/one-import-pytorch
index dbf1ba6d7..7f39e61bb 100644
--- a/compiler/one-cmds/one-import-pytorch
+++ b/compiler/one-cmds/one-import-pytorch
@@ -80,7 +80,8 @@ def _get_parser():
     tf2tflite_group.add_argument('--converter_version', default='v2')
 
     parser.add_argument('--unroll_rnn', action='store_true', help='Unroll RNN operators')
-    parser.add_argument('--unroll_lstm', action='store_true', help='Unroll LSTM operators')
+    parser.add_argument(
+        '--unroll_lstm', action='store_true', help='Unroll LSTM operators')
 
     # save intermediate file(s)
     parser.add_argument(
@@ -338,8 +339,8 @@ def _convert(args):
         # make a command to convert from tflite to circle
         tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
         tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
-                                                           tf2tfliteV2_output_path,
-                                                           getattr(args, 'output_path'))
+                                                             tf2tfliteV2_output_path,
+                                                             getattr(args, 'output_path'))
 
         f.write((' '.join(tflite2circle_cmd) + '\n').encode())
 
diff --git a/compiler/one-cmds/one-import-tf b/compiler/one-cmds/one-import-tf
index 999255a34..6623fa6a4 100644
--- a/compiler/one-cmds/one-import-tf
+++ b/compiler/one-cmds/one-import-tf
@@ -21,8 +21,6 @@
 
 import argparse
 import os
-import subprocess
-import sys
 import tempfile
 
 import onelib.make_cmd as _make_cmd
@@ -152,8 +150,8 @@ def _convert(args):
             tmpdir,
             os.path.splitext(os.path.basename(args.output_path))[0]) + '.tflite'
         tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(args, tf2tfliteV2_path,
-                                                       getattr(args, 'input_path'),
-                                                       tf2tfliteV2_output_path)
+                                                         getattr(args, 'input_path'),
+                                                         tf2tfliteV2_output_path)
 
         f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
 
@@ -163,8 +161,8 @@ def _convert(args):
         # make a command to convert from tflite to circle
         tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
         tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
-                                                           tf2tfliteV2_output_path,
-                                                           getattr(args, 'output_path'))
+                                                             tf2tfliteV2_output_path,
+                                                             getattr(args, 'output_path'))
 
         f.write((' '.join(tflite2circle_cmd) + '\n').encode())
 
diff --git a/compiler/one-cmds/one-import-tflite b/compiler/one-cmds/one-import-tflite
index 2d756bff6..3d96b117f 100644
--- a/compiler/one-cmds/one-import-tflite
+++ b/compiler/one-cmds/one-import-tflite
@@ -21,7 +21,6 @@
 
 import argparse
 import os
-import subprocess
 import sys
 
 import onelib.make_cmd as _make_cmd
@@ -83,8 +82,8 @@ def _convert(args):
         # make a command to convert from tflite to circle
         tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
         tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
-                                                           getattr(args, 'input_path'),
-                                                           getattr(args, 'output_path'))
+                                                             getattr(args, 'input_path'),
+                                                             getattr(args, 'output_path'))
 
         f.write((' '.join(tflite2circle_cmd) + '\n').encode())
 
diff --git a/compiler/one-cmds/one-infer b/compiler/one-cmds/one-infer
new file mode 100644
index 000000000..c7fcd8afd
--- /dev/null
+++ b/compiler/one-cmds/one-infer
@@ -0,0 +1,224 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import copy
+import glob
+import itertools
+import ntpath
+import os
+import sys
+
+import utils as _utils
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+
+def _get_backends_list():
+    """
+    [one hierarchy]
+    one
+    ├── backends
+    ├── bin
+    ├── doc
+    ├── include
+    ├── lib
+    ├── optimization
+    └── test
+
+    The list where `one-infer` finds its backends
+    - `bin` folder where `one-infer` exists
+    - `backends` folder
+
+    NOTE If there are backends of the same name in different places,
+     the closer to the top in the list, the higher the priority.
+    """
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    backend_set = set()
+
+    # bin folder
+    files = [f for f in glob.glob(dir_path + '/*-infer')]
+    # backends folder
+    files += [f for f in glob.glob(dir_path + '/../backends/**/*-infer', recursive=True)]
+    # TODO find backends in `$PATH`
+
+    backends_list = []
+    for cand in files:
+        base = ntpath.basename(cand)
+        if (not base in backend_set) and os.path.isfile(cand) and os.access(
+                cand, os.X_OK):
+            backend_set.add(base)
+            backends_list.append(cand)
+
+    return backends_list
+
+
+def _search_backend_driver(driver):
+    """
+    [one hierarchy]
+    one
+    ├── backends
+    ├── bin
+    ├── doc
+    ├── include
+    ├── lib
+    ├── optimization
+    └── test
+
+    The list where `one-infer` finds its backend driver
+    - `bin` folder where `one-infer` exists
+    - `backends/**/bin/` folder
+
+    NOTE If there are drivers of the same name in different places,
+     the closer to the top in the list, the higher the priority.
+    """
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+
+    # CASE 1: one/bin/{driver} is found
+    driver_path = dir_path + '/' + driver
+    if os.path.isfile(driver_path) and os.access(driver_path, os.X_OK):
+        return driver_path
+
+    # CASE 2: one/backends/**/bin/{driver} is found
+    for driver_path in glob.glob(
+            dir_path + '/../backends/**/bin/' + driver, recursive=True):
+        if os.path.isfile(driver_path) and os.access(driver_path, os.X_OK):
+            return driver_path
+
+    # CASE 3: {driver} is found in nowhere
+    return None
+
+
+def _get_parser(backends_list):
+    infer_usage = 'one-infer [-h] [-v] [-C CONFIG] [-d DRIVER | -b BACKEND] [--post-process POST_PROCESS] [--] [COMMANDS FOR BACKEND DRIVER]'
+    parser = argparse.ArgumentParser(
+        description='command line tool to infer model', usage=infer_usage)
+
+    _utils._add_default_arg(parser)
+
+    # TODO: add tflite/onnx-infer driver to helper message when it is implemented
+    driver_help_message = 'backend inference driver name to execute'
+    parser.add_argument('-d', '--driver', type=str, help=driver_help_message)
+
+    # get backend list in the directory
+    backends_name = [ntpath.basename(f) for f in backends_list]
+    if not backends_name:
+        backends_name_message = '(There is no available backend drivers)'
+    else:
+        backends_name_message = '(available backend drivers: ' + ', '.join(
+            backends_name) + ')'
+    backend_help_message = 'backend name to use ' + backends_name_message
+    parser.add_argument('-b', '--backend', type=str, help=backend_help_message)
+
+    post_process_help_message = 'post processing script to convert I/O data to standard format'
+    parser.add_argument('--post-process', type=str, help=post_process_help_message)
+
+    return parser
+
+
+def _verify_arg(parser, args):
+    """verify given arguments"""
+    # `-d/--driver` and `-b/--backend` are mutually exclusive arguments.
+    if _utils._is_valid_attr(args, 'driver') and _utils._is_valid_attr(args, 'backend'):
+        parser.error(
+            '-d and -b options are mutually exclusive. Please use only one of them')
+
+    missing = []
+    if not _utils._is_valid_attr(args, 'driver') and not _utils._is_valid_attr(
+            args, 'backend'):
+        missing.append('{-d/--driver | -b/--backend}')
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+
+
+def _parse_arg(parser):
+    infer_args = []
+    backend_args = []
+    argv = copy.deepcopy(sys.argv)
+    # delete file name
+    del argv[0]
+    # split by '--'
+    args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x]
+
+    # one-infer [-h] [-v] [-C CONFIG] [-d DRIVER] [-b BACKEND] [--post-process POST_PROCESS] -- [COMMANDS FOR BACKEND DRIVER]
+    if len(args):
+        infer_args = args[0]
+        infer_args = parser.parse_args(infer_args)
+        backend_args = backend_args if len(args) < 2 else args[1]
+    # print version
+    if len(args) and infer_args.version:
+        _utils._print_version_and_exit(__file__)
+
+    return infer_args, backend_args
+
+
+def _get_executable(args, backends_list):
+    driver = _utils._is_valid_attr(args, 'driver')
+    if driver:
+        executable = _search_backend_driver(driver)
+        if executable:
+            return executable
+        else:
+            raise FileNotFoundError(driver + ' not found')
+
+    if _utils._is_valid_attr(args, 'backend'):
+        backend_base = getattr(args, 'backend') + '-infer'
+        for cand in backends_list:
+            if ntpath.basename(cand) == backend_base:
+                return cand
+        raise FileNotFoundError(backend_base + ' not found')
+
+
+def main():
+    # get backend list
+    backends_list = _get_backends_list()
+
+    # parse arguments
+    parser = _get_parser(backends_list)
+    args, backend_args = _parse_arg(parser)
+
+    # parse configuration file
+    _utils._parse_cfg(args, 'one-infer')
+
+    # verify arguments
+    _verify_arg(parser, args)
+
+    # make a command to run given backend driver
+    driver_path = _get_executable(args, backends_list)
+    infer_cmd = [driver_path] + backend_args
+    if _utils._is_valid_attr(args, 'command'):
+        infer_cmd += getattr(args, 'command').split()
+
+    # run backend driver
+    _utils._run(infer_cmd, err_prefix=ntpath.basename(driver_path))
+
+    # run post process script if it's given
+    if _utils._is_valid_attr(args, 'post_process'):
+        # NOTE: the given python script will be executed by venv of ONE
+        python_path = sys.executable
+        post_process_command = [python_path] + getattr(args,
+                                                       'post_process').strip().split(' ')
+        _utils._run(post_process_command, err_prefix='one-infer')
+
+
+if __name__ == '__main__':
+    _utils._safemain(main, __file__)
diff --git a/compiler/one-cmds/one-init b/compiler/one-cmds/one-init
new file mode 100644
index 000000000..04c4534cd
--- /dev/null
+++ b/compiler/one-cmds/one-init
@@ -0,0 +1,280 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import copy
+import glob
+import itertools
+import ntpath
+import os
+import sys
+
+import configparser
+import utils as _utils
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+
+class CommentableConfigParser(configparser.ConfigParser):
+    """
+    ConfigParser where comment can be stored
+    In Python ConfigParser, comment in ini file ( starting with ';') is considered a key of which
+    value is None.
+    Ref: https://stackoverflow.com/questions/6620637/writing-comments-to-files-with-configparser
+    """
+
+    def __init__(self):
+        # allow_no_value=True to add comment
+        # ref: https://stackoverflow.com/a/19432072
+        configparser.ConfigParser.__init__(self, allow_no_value=True)
+        self.optionxform = str
+
+    def add_comment(self, section, comment):
+        comment_sign = ';'
+        self[section][f'{comment_sign} {comment}'] = None
+
+
+def _get_backends_list():
+    """
+    [one hierarchy]
+    one
+    ├── backends
+    ├── bin
+    ├── doc
+    ├── include
+    ├── lib
+    ├── optimization
+    └── test
+
+    The list where `one-init` finds its backends
+    - `bin` folder where `one-init` exists
+    - `backends` folder
+
+    NOTE If there are backends of the same name in different places,
+     the closer to the top in the list, the higher the priority.
+    """
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    backend_set = set()
+
+    # bin folder
+    files = [f for f in glob.glob(dir_path + '/*-init')]
+    # backends folder
+    files += [f for f in glob.glob(dir_path + '/../backends/**/*-init', recursive=True)]
+    # TODO find backends in `$PATH`
+
+    backends_list = []
+    for cand in files:
+        base = ntpath.basename(cand)
+        if (not base in backend_set) and os.path.isfile(cand) and os.access(
+                cand, os.X_OK):
+            backend_set.add(base)
+            backends_list.append(cand)
+
+    return backends_list
+
+
+# TODO Add support for TF graphdef and bcq
+def _get_parser(backends_list):
+    init_usage = (
+        'one-init [-h] [-v] [-V] '
+        '[-i INPUT_PATH] '
+        '[-o OUTPUT_PATH] '
+        '[-m MODEL_TYPE] '
+        '[-b BACKEND] '
+        # args for onnx model
+        '[--convert_nchw_to_nhwc] '
+        '[--nchw_to_nhwc_input_shape] '
+        '[--nchw_to_nhwc_output_shape] '
+        # args for backend driver
+        '[--] [COMMANDS FOR BACKEND DRIVER]')
+    """
+    NOTE
+    layout options for onnx model could be difficult to users.
+    In one-init, we could consider easier args for the the above three:
+    For example, we could have another option, e.g., --input_img_layout LAYOUT
+      - When LAYOUT is NHWC, apply 'nchw_to_nhwc_input_shape=True' into cfg
+      - When LAYOUT is NCHW, apply 'nchw_to_nhwc_input_shape=False' into cfg
+    """
+
+    parser = argparse.ArgumentParser(
+        description='Command line tool to generate initial cfg file. '
+        'Currently tflite and onnx models are supported',
+        usage=init_usage)
+
+    _utils._add_default_arg_no_CS(parser)
+
+    parser.add_argument(
+        '-i', '--input_path', type=str, help='full filepath of the input model file')
+    parser.add_argument(
+        '-o', '--output_path', type=str, help='full filepath of the output cfg file')
+    parser.add_argument(
+        '-m',
+        '--model_type',
+        type=str,
+        help=('type of input model: "onnx", "tflite". '
+              'If the file extension passed to --input_path is '
+              '".tflite" or ".onnx", this arg can be omitted.'))
+
+    onnx_group = parser.add_argument_group('arguments when model type is onnx')
+    onnx_group.add_argument(
+        '--convert_nchw_to_nhwc',
+        action='store_true',
+        help=
+        'Convert NCHW operators to NHWC under the assumption that input model is NCHW.')
+    onnx_group.add_argument(
+        '--nchw_to_nhwc_input_shape',
+        action='store_true',
+        help='Convert the input shape of the model (argument for convert_nchw_to_nhwc)')
+    onnx_group.add_argument(
+        '--nchw_to_nhwc_output_shape',
+        action='store_true',
+        help='Convert the output shape of the model (argument for convert_nchw_to_nhwc)')
+
+    # get backend list in the directory
+    backends_name = [ntpath.basename(f) for f in backends_list]
+    if not backends_name:
+        backends_name_message = '(There is no available backend drivers)'
+    else:
+        backends_name_message = '(available backend drivers: ' + ', '.join(
+            backends_name) + ')'
+    backend_help_message = 'backend name to use ' + backends_name_message
+    parser.add_argument('-b', '--backend', type=str, help=backend_help_message)
+
+    return parser
+
+
+def _verify_arg(parser, args):
+    # check if required arguments is given
+    missing = []
+    if not _utils._is_valid_attr(args, 'input_path'):
+        missing.append('-i/--input_path')
+    if not _utils._is_valid_attr(args, 'output_path'):
+        missing.append('-o/--output_path')
+    if not _utils._is_valid_attr(args, 'backend'):
+        missing.append('-b/--backend')
+
+    if _utils._is_valid_attr(args, 'model_type'):
+        # TODO Support model types other than onnx and tflite (e.g., TF)
+        if getattr(args, 'model_type') not in ['onnx', 'tflite']:
+            parser.error('Allowed value for --model_type: "onnx" or "tflite"')
+
+    if _utils._is_valid_attr(args, 'nchw_to_nhwc_input_shape'):
+        if not _utils._is_valid_attr(args, 'convert_nchw_to_nhwc'):
+            missing.append('--convert_nchw_to_nhwc')
+    if _utils._is_valid_attr(args, 'nchw_to_nhwc_output_shape'):
+        if not _utils._is_valid_attr(args, 'convert_nchw_to_nhwc'):
+            missing.append('--convert_nchw_to_nhwc')
+
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+
+
+def _parse_arg(parser):
+    init_args = []
+    backend_args = []
+    argv = copy.deepcopy(sys.argv)
+    # delete file name
+    del argv[0]
+    # split by '--'
+    args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x]
+
+    # one-init [-h] [-v] ...
+    if len(args):
+        init_args = args[0]
+        init_args = parser.parse_args(init_args)
+        backend_args = backend_args if len(args) < 2 else args[1]
+    # print version
+    if len(args) and init_args.version:
+        _utils._print_version_and_exit(__file__)
+
+    return init_args, backend_args
+
+
+def _get_executable(args, backends_list):
+    if _utils._is_valid_attr(args, 'backend'):
+        backend_base = getattr(args, 'backend') + '-init'
+        for cand in backends_list:
+            if ntpath.basename(cand) == backend_base:
+                return cand
+        raise FileNotFoundError(backend_base + ' not found')
+
+
+# TODO Support workflow format (https://github.com/Samsung/ONE/pull/9354)
+def _generate():
+    # generate cfg file
+    config = CommentableConfigParser()
+
+    def _add_onecc_sections():
+        pass  # NYI
+
+    def _gen_import():
+        pass  # NYI
+
+    def _gen_optimize():
+        pass  # NYI
+
+    def _gen_quantize():
+        pass  # NYI
+
+    def _gen_codegen():
+        pass  # NYI
+
+    #
+    # NYI: one-profile, one-partition, one-pack, one-infer
+    #
+
+    _add_onecc_sections()
+
+    _gen_import()
+    _gen_optimize()
+    _gen_quantize()
+    _gen_codegen()
+
+    with open(args.output_path, 'w') as f:
+        config.write(f)
+
+
+def main():
+    # get backend list
+    backends_list = _get_backends_list()
+
+    # parse arguments
+    parser = _get_parser(backends_list)
+    args, backend_args = _parse_arg(parser)
+
+    # verify arguments
+    _verify_arg(parser, args)
+
+    # make a command to run given backend driver
+    driver_path = _get_executable(args, backends_list)
+    init_cmd = [driver_path] + backend_args
+
+    # run backend driver
+    _utils._run(init_cmd, err_prefix=ntpath.basename(driver_path))
+
+    #TODO generate cfg file
+
+    raise NotImplementedError("NYI")
+
+
+if __name__ == '__main__':
+    _utils._safemain(main, __file__)
diff --git a/compiler/one-cmds/one-optimize b/compiler/one-cmds/one-optimize
index 8b1f3f7be..481fc8459 100644
--- a/compiler/one-cmds/one-optimize
+++ b/compiler/one-cmds/one-optimize
@@ -21,7 +21,6 @@
 
 import argparse
 import os
-import subprocess
 import sys
 
 import onelib.constant as _constant
@@ -83,6 +82,14 @@ def _verify_arg(parser, args):
     if len(missing):
         parser.error('the following arguments are required: ' + ' '.join(missing))
 
+    # default has pre-defined optimization options
+    default = _get_parser().parse_args()
+
+    # check if unrecognized arguments are given
+    diff = set(dir(args)) - set(dir(default))
+    if len(diff):
+        parser.error('the following arguments are unrecognized: ' + ' '.join(diff))
+
 
 def _parse_arg(parser):
     args = parser.parse_args()
@@ -102,8 +109,8 @@ def _optimize(args):
         # make a command to optimize circle model
         circle2circle_path = os.path.join(dir_path, 'circle2circle')
         circle2circle_cmd = _make_cmd.make_circle2circle_cmd(args, circle2circle_path,
-                                                           getattr(args, 'input_path'),
-                                                           getattr(args, 'output_path'))
+                                                             getattr(args, 'input_path'),
+                                                             getattr(args, 'output_path'))
 
         # verbose
         if _utils._is_valid_attr(args, 'verbose'):
diff --git a/compiler/one-cmds/one-pack b/compiler/one-cmds/one-pack
index 133207de0..5cab7c737 100644
--- a/compiler/one-cmds/one-pack
+++ b/compiler/one-cmds/one-pack
@@ -21,9 +21,7 @@
 
 import argparse
 import os
-import subprocess
 import sys
-import tempfile
 
 import utils as _utils
 
diff --git a/compiler/one-cmds/one-partition b/compiler/one-cmds/one-partition
new file mode 100644
index 000000000..c0d71e5d9
--- /dev/null
+++ b/compiler/one-cmds/one-partition
@@ -0,0 +1,130 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import configparser
+import os
+import sys
+
+import utils as _utils
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+
+def _get_parser():
+    parser = argparse.ArgumentParser(
+        description='command line tool to partition circle model by multiple backends')
+
+    _utils._add_default_arg(parser)
+
+    parser.add_argument(
+        '--backends', type=str, help='backends in CSV to use for partitioning')
+    parser.add_argument('--default', type=str, help='default backend to assign')
+
+    parser.add_argument(
+        '--part_file', type=str, help='partition file which provides backend to assign')
+    parser.add_argument('--input_file', type=str, help='input circle model filename')
+    parser.add_argument(
+        '--work_path',
+        type=str,
+        help='work path of partition, input files exist and output files are produced')
+
+    return parser
+
+
+def _parse_arg(parser):
+    args = parser.parse_args()
+    # print version
+    if args.version:
+        _utils._print_version_and_exit(__file__)
+
+    return args
+
+
+def _verify_arg(parser, args):
+    """verify given arguments"""
+    # check if required arguments is given
+    missing = []
+    if not _utils._is_valid_attr(args, 'part_file'):
+        missing.append('part_file')
+    if not _utils._is_valid_attr(args, 'input_file'):
+        missing.append('input_file')
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+    return
+
+
+def _partition(args):
+    # get file path to log
+    bin_path = os.path.dirname(os.path.realpath(__file__))
+    cur_path = os.getcwd()
+    partition_path = os.path.join(cur_path, args.part_file)
+    logfile_path = partition_path + '.log'
+
+    with open(logfile_path, 'wb', buffering=0) as f:
+        # make a command to package circle model and metadata into nnpackage
+        circle_partitioner_path = os.path.join(bin_path, 'circle-partitioner')
+
+        cmd = [os.path.expanduser(circle_partitioner_path)]
+
+        if _utils._is_valid_attr(args, 'backends'):
+            cmd.append('--backends')
+            cmd.append(getattr(args, 'backends'))
+        if _utils._is_valid_attr(args, 'default'):
+            cmd.append('--default')
+            cmd.append(getattr(args, 'default'))
+        if _utils._is_valid_attr(args, 'work_path'):
+            cmd.append('--work_path')
+            cmd.append(getattr(args, 'work_path'))
+
+        cmd.append('--part_file')
+        cmd.append(args.part_file)
+        cmd.append('--input_file')
+        cmd.append(args.input_file)
+
+        f.write((' '.join(cmd) + '\n').encode())
+
+        # run circle-partitoner
+        _utils._run(cmd, err_prefix='circle-partitioner', logfile=f)
+
+
+def main():
+    # parse arguments
+    parser = _get_parser()
+    args = _parse_arg(parser)
+
+    # parse configuration file
+    _utils._parse_cfg(args, 'one-partition')
+
+    if _utils._is_valid_attr(args, 'config'):
+        config_path = getattr(args, 'config')
+        _utils._parse_cfg_and_overwrite(config_path, 'one-partition', args)
+
+    # verify arguments
+    _verify_arg(parser, args)
+
+    # do partition
+    _partition(args)
+
+
+if __name__ == '__main__':
+    _utils._safemain(main, __file__)
diff --git a/compiler/one-cmds/one-prepare-venv b/compiler/one-cmds/one-prepare-venv
index 0f75166a7..b435671f4 100644
--- a/compiler/one-cmds/one-prepare-venv
+++ b/compiler/one-cmds/one-prepare-venv
@@ -41,6 +41,7 @@ VER_ONNX_TF=1.10.0
 # Install tensorflow
 
 PIP_TRUSTED_HOST="--trusted-host pypi.org "
+PIP_TRUSTED_HOST+="--trusted-host pypi.python.org "
 PIP_TRUSTED_HOST+="--trusted-host files.pythonhost.org "
 PIP_TRUSTED_HOST+="--trusted-host download.pytorch.org "
 
@@ -62,7 +63,8 @@ else
   ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow-cpu==${VER_TENSORFLOW}
 fi
 ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow
-${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_probability
+# TODO remove version fix, https://github.com/Samsung/ONE/issues/9240
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_probability==0.16.0
 
 # Install PyTorch and ONNX related
 # NOTE set ONE_PREPVENV_TORCH_STABLE to override 'torch_stable.html' URL.
@@ -72,6 +74,8 @@ TORCH_STABLE_URL="https://download.pytorch.org/whl/torch_stable.html"
 if [[ ! -z "$ONE_PREPVENV_TORCH_STABLE" ]]; then
   TORCH_STABLE_URL="${ONE_PREPVENV_TORCH_STABLE}"
 fi
+# TODO remove torch message
+echo "Torch from '${ONE_PREPVENV_TORCH_STABLE}' -> '${TORCH_STABLE_URL}'"
 ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.11.0+cpu -f ${TORCH_STABLE_URL}
 
 ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX}
@@ -84,3 +88,7 @@ if [ -n "${EXT_ONNX_TF_WHL}" ]; then
 else
   ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx-tf==${VER_ONNX_TF}
 fi
+
+# NOTE refer https://github.com/protocolbuffers/protobuf/issues/10051
+# TODO remove this when issue is resolved
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install --upgrade protobuf==3.20.1
diff --git a/compiler/one-cmds/one-profile b/compiler/one-cmds/one-profile
index ed6d8bd7a..b19c215ed 100644
--- a/compiler/one-cmds/one-profile
+++ b/compiler/one-cmds/one-profile
@@ -25,9 +25,7 @@ import glob
 import itertools
 import ntpath
 import os
-import subprocess
 import sys
-import tempfile
 
 import utils as _utils
 
diff --git a/compiler/one-cmds/one-quantize b/compiler/one-cmds/one-quantize
index f2eff24bd..9282007d8 100644
--- a/compiler/one-cmds/one-quantize
+++ b/compiler/one-cmds/one-quantize
@@ -21,11 +21,12 @@
 
 import argparse
 import os
-import subprocess
 import sys
 import tempfile
+import json
 
 import utils as _utils
+from utils import Command
 
 # TODO Find better way to suppress trackback on error
 sys.tracebacklimit = 0
@@ -67,6 +68,12 @@ def _get_parser():
         action='store_true',
         help='generate profiling data')
 
+    # save intermediate file(s)
+    parser.add_argument(
+        '--save_intermediate',
+        action='store_true',
+        help='Save intermediate files to output folder')
+
     ## arguments for quantization
     quantization_group = parser.add_argument_group('arguments for quantization')
 
@@ -93,13 +100,13 @@ def _get_parser():
         '--input_type',
         type=str,
         help=
-        'data type of inputs of quantized model (supported: uint8, int16, default=quantized_dtype). QUANTIZE Op will be inserted at the beginning of the quantized model if input_type is different from quantized_dtype.'
+        'data type of inputs of quantized model (supported: uint8, int16, float32, default=quantized_dtype). QUANTIZE Op will be inserted at the beginning of the quantized model if input_type is different from quantized_dtype.'
     )
     quantization_group.add_argument(
         '--output_type',
         type=str,
         help=
-        'data type of outputs of quantized model (supported: uint8, int16, default=quantized_dtype). QUANTIZE Op will be inserted at the end of the quantized model if output_type is different from quantized_dtype.'
+        'data type of outputs of quantized model (supported: uint8, int16, float32, default=quantized_dtype). QUANTIZE Op will be inserted at the end of the quantized model if output_type is different from quantized_dtype.'
     )
     quantization_group.add_argument(
         '--min_percentile',
@@ -126,10 +133,50 @@ def _get_parser():
         "Force MaxPool Op to have the same input/output quantparams. NOTE: This option can degrade accuracy of some models.)"
     )
     quantization_group.add_argument(
-        '--quant_config',
-        type=str,
+        '--quant_config', type=str, help="Path to the quantization configuration file.")
+    quantization_group.add_argument(
+        '--evaluate_result',
+        action='store_true',
+        help=
+        "Evaluate accuracy of quantized model. Run inference for both fp32 model and the quantized model, and compare the inference results."
+    )
+    quantization_group.add_argument(
+        '--test_data', type=str, help="Path to the test data used for evaluation.")
+    quantization_group.add_argument(
+        '--print_mae',
+        action='store_true',
+        help=
+        "Print MAE (Mean Absolute Error) of inference results between quantized model and fp32 model."
+    )
+    quantization_group.add_argument(
+        '--print_mape',
+        action='store_true',
+        help=
+        "Print MAPE (Mean Absolute Percentage Error) of inference results between quantized model and fp32 model."
+    )
+    quantization_group.add_argument(
+        '--print_mpeir',
+        action='store_true',
+        help=
+        "Print MPEIR (Mean Peak Error to Interval Ratio) of inference results between quantized model and fp32 model."
+    )
+    quantization_group.add_argument(
+        '--print_top1_match',
+        action='store_true',
+        help=
+        "Print Top-1 match ratio of inference results between quantized model and fp32 model."
+    )
+    quantization_group.add_argument(
+        '--print_top5_match',
+        action='store_true',
+        help=
+        "Print Top-5 match ratio of inference results between quantized model and fp32 model."
+    )
+    quantization_group.add_argument(
+        '--print_mse',
+        action='store_true',
         help=
-        "Path to the quantization configuration file."
+        "Print MSE (Mean Squared Error) of inference results between quantized model and fp32 model."
     )
 
     # arguments for force_quantparam option
@@ -162,6 +209,14 @@ def _get_parser():
     copy_quantparam_group.add_argument(
         '--dst_tensor_name', type=str, action='append', help='tensor name (string)')
 
+    # arguments for fake_quant option
+    fake_quant_group = parser.add_argument_group('arguments for fake_quantize option')
+
+    fake_quant_group.add_argument(
+        '--fake_quantize',
+        action='store_true',
+        help='convert quantized model to fake-quantized fp32 model.')
+
     return parser
 
 
@@ -171,8 +226,29 @@ def _set_default_values(args):
         setattr(args, 'input_model_dtype', 'float32')
     if not _utils._is_valid_attr(args, 'quantized_dtype'):
         setattr(args, 'quantized_dtype', 'uint8')
+        if _utils._is_valid_attr(args, 'quant_config'):
+            # Get quantized_dtype from qconfig file
+            try:
+                with open(getattr(args, 'quant_config')) as f:
+                    qconf = json.load(f)
+                    if 'default_quantization_dtype' in qconf:
+                        setattr(args, 'quantized_dtype',
+                                qconf['default_quantization_dtype'])
+            except json.decoder.JSONDecodeError:
+                print('Failed to decode ' + getattr(args, 'quant_config') +
+                      '. Please check it is a json file.')
     if not _utils._is_valid_attr(args, 'granularity'):
         setattr(args, 'granularity', 'layer')
+        if _utils._is_valid_attr(args, 'quant_config'):
+            # Get granularity from qconfig file
+            try:
+                with open(getattr(args, 'quant_config')) as f:
+                    qconf = json.load(f)
+                    if 'default_granularity' in qconf:
+                        setattr(args, 'granularity', qconf['default_granularity'])
+            except json.decoder.JSONDecodeError:
+                print('Failed to decode ' + getattr(args, 'quant_config') +
+                      '. Please check it is a json file.')
     if not _utils._is_valid_attr(args, 'mode'):
         setattr(args, 'mode', 'percentile')
     if not _utils._is_valid_attr(args, 'min_percentile'):
@@ -238,11 +314,18 @@ def _quantize(args):
         _copy_qparam(args)
         return
 
+    if _utils._is_valid_attr(args, 'fake_quantize'):
+        # fake-quantize model
+        _fake_quantize(args)
+        return
+
     # get file path to log
     dir_path = os.path.dirname(os.path.realpath(__file__))
     logfile_path = os.path.realpath(args.output_path) + '.log'
 
     with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
+        if _utils._is_valid_attr(args, 'save_intermediate'):
+            tmpdir = os.path.dirname(logfile_path)
         # get driver path
         circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
         record_minmax_path = os.path.join(dir_path, 'record-minmax')
@@ -263,13 +346,19 @@ def _quantize(args):
             circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
         if _utils._is_valid_attr(args, 'granularity'):
             circle_quantizer_cmd.append(getattr(args, 'granularity'))
+        if _utils._is_valid_attr(args, 'quant_config'):
+            # NOTE --config conflicts with --config option in onecc, so
+            # we use quant_config for one-quantize
+            circle_quantizer_cmd.append('--config')
+            circle_quantizer_cmd.append(getattr(args, 'quant_config'))
         # input and output path
         if _utils._is_valid_attr(args, 'input_path'):
             circle_quantizer_cmd.append(getattr(args, 'input_path'))
-        tmp_output_path_1 = os.path.join(
+        tmp_weights_fake_quant_path = os.path.join(
             tmpdir,
-            os.path.splitext(os.path.basename(args.input_path))[0]) + '1.circle'
-        circle_quantizer_cmd.append(tmp_output_path_1)
+            os.path.splitext(os.path.basename(
+                args.input_path))[0]) + '.weights_fake_quant.circle'
+        circle_quantizer_cmd.append(tmp_weights_fake_quant_path)
         # profiling
         if _utils._is_valid_attr(args, 'generate_profile_data'):
             circle_quantizer_cmd.append('--generate_profile_data')
@@ -279,45 +368,23 @@ def _quantize(args):
         # run circle-quantizer
         _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
 
-        ## make a command to record min-max value of each tensor while running the representative dataset
-        circle_record_minmax_cmd = [record_minmax_path]
-        # verbose
-        if _utils._is_valid_attr(args, 'verbose'):
-            circle_record_minmax_cmd.append('--verbose')
-        # input and output path
-        circle_record_minmax_cmd.append('--input_model')
-        circle_record_minmax_cmd.append(tmp_output_path_1)
-        tmp_output_path_2 = os.path.join(
+        tmp_minmax_recorded_path = os.path.join(
             tmpdir,
-            os.path.splitext(os.path.basename(args.input_path))[0]) + '2.circle'
-        circle_record_minmax_cmd.append('--output_model')
-        circle_record_minmax_cmd.append(tmp_output_path_2)
-        # input data
-        if _utils._is_valid_attr(args, 'input_data'):
-            circle_record_minmax_cmd.append('--input_data')
-            circle_record_minmax_cmd.append(getattr(args, 'input_data'))
-        if _utils._is_valid_attr(args, 'input_data_format'):
-            circle_record_minmax_cmd.append('--input_data_format')
-            circle_record_minmax_cmd.append(getattr(args, 'input_data_format'))
-        # min and max percentile
-        if _utils._is_valid_attr(args, 'min_percentile'):
-            circle_record_minmax_cmd.append('--min_percentile')
-            circle_record_minmax_cmd.append(getattr(args, 'min_percentile'))
-        if _utils._is_valid_attr(args, 'max_percentile'):
-            circle_record_minmax_cmd.append('--max_percentile')
-            circle_record_minmax_cmd.append(getattr(args, 'max_percentile'))
-        # mode
-        if _utils._is_valid_attr(args, 'mode'):
-            circle_record_minmax_cmd.append('--mode')
-            circle_record_minmax_cmd.append(getattr(args, 'mode'))
-        # profiling
-        if _utils._is_valid_attr(args, 'generate_profile_data'):
-            circle_record_minmax_cmd.append('--generate_profile_data')
-
-        f.write((' '.join(circle_record_minmax_cmd) + '\n').encode())
+            os.path.splitext(os.path.basename(
+                args.input_path))[0]) + '.minmax_recorded.circle'
 
-        # run record-minmax
-        _utils._run(circle_record_minmax_cmd, err_prefix="record_minmax", logfile=f)
+        ## make a command to record min-max value of each tensor while running the representative dataset
+        record_minmax_cmd = Command(record_minmax_path, args, f)
+        record_minmax_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
+            .add_option_with_values('--input_model', [tmp_weights_fake_quant_path]) \
+            .add_option_with_values('--output_model', [tmp_minmax_recorded_path]) \
+            .add_option_with_valid_args('--input_data', ['input_data']) \
+            .add_option_with_valid_args('--input_data_format', ['input_data_format']) \
+            .add_option_with_valid_args('--min_percentile', ['min_percentile']) \
+            .add_option_with_valid_args('--max_percentile', ['max_percentile']) \
+            .add_option_with_valid_args('--mode', ['mode']) \
+            .add_noarg_option_if_valid_arg('--generate_profile_data', 'generate_profile_data') \
+            .run()
 
         ## make a second command to quantize the model using the embedded information
         circle_quantizer_cmd = [circle_quantizer_path]
@@ -349,7 +416,7 @@ def _quantize(args):
             circle_quantizer_cmd.append('--config')
             circle_quantizer_cmd.append(getattr(args, 'quant_config'))
         # input and output path
-        circle_quantizer_cmd.append(tmp_output_path_2)
+        circle_quantizer_cmd.append(tmp_minmax_recorded_path)
         if _utils._is_valid_attr(args, 'output_path'):
             circle_quantizer_cmd.append(getattr(args, 'output_path'))
         # profiling
@@ -361,6 +428,38 @@ def _quantize(args):
         # run circle-quantizer
         _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
 
+        # evaluate
+        if _utils._is_valid_attr(args, 'evaluate_result'):
+            circle_eval_diff_path = os.path.join(dir_path, 'circle-eval-diff')
+            quant_model = ""
+            if _utils._is_valid_attr(args, 'output_path'):
+                quant_model = getattr(args, 'output_path')
+            tmp_fake_quant_model = os.path.join(
+                tmpdir,
+                os.path.splitext(os.path.basename(
+                    args.input_path))[0]) + '.fake_quant.circle'
+
+            # do fake quantization
+            fake_quantize_cmd = Command(circle_quantizer_path, args, f)
+            fake_quantize_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
+                .add_option_with_values('--fake_quantize', [quant_model, tmp_fake_quant_model]) \
+                .run()
+
+            # compare fake-quant model and fp32 model
+            circle_eval_diff_cmd = Command(circle_eval_diff_path, args, f)
+            circle_eval_diff_cmd.add_option_with_valid_args('--first_model', ['input_path']) \
+                .add_option_with_values('--second_model', [tmp_fake_quant_model]) \
+                .add_option_with_valid_args('--first_input_data', ['test_data']) \
+                .add_option_with_valid_args('--second_input_data', ['test_data']) \
+                .add_option_with_valid_args('--input_data_format', ['input_data_format']) \
+                .add_noarg_option_if_valid_arg('--print_mae', 'print_mae') \
+                .add_noarg_option_if_valid_arg('--print_mape', 'print_mape') \
+                .add_noarg_option_if_valid_arg('--print_mpeir', 'print_mpeir') \
+                .add_noarg_option_if_valid_arg('--print_top1_match', 'print_top1_match') \
+                .add_noarg_option_if_valid_arg('--print_top5_match', 'print_top5_match') \
+                .add_noarg_option_if_valid_arg('--print_mse', 'print_mse') \
+                .run()
+
 
 def _write_qparam(args):
     # get file path to log
@@ -433,6 +532,24 @@ def _copy_qparam(args):
         _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
 
 
+def _fake_quantize(args):
+    # get file path to log
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    logfile_path = os.path.realpath(args.output_path) + '.log'
+
+    with open(logfile_path, 'wb') as f:
+        # get driver path
+        circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
+        q_model = getattr(args, 'input_path')
+        fq_model = getattr(args, 'output_path')
+
+        # do fake quantization
+        fake_quantize_cmd = Command(circle_quantizer_path, args, f)
+        fake_quantize_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
+            .add_option_with_values('--fake_quantize', [q_model, fq_model]) \
+            .run()
+
+
 def main():
     # parse arguments
     parser = _get_parser()
diff --git a/compiler/one-cmds/onecc b/compiler/one-cmds/onecc
index 25682ff4b..a5ba636a2 100644
--- a/compiler/one-cmds/onecc
+++ b/compiler/one-cmds/onecc
@@ -25,6 +25,8 @@ import os
 import subprocess
 import sys
 
+from onelib.CfgRunner import CfgRunner
+from onelib.WorkflowRunner import WorkflowRunner
 import utils as _utils
 
 # TODO Find better way to suppress trackback on error
@@ -42,6 +44,7 @@ subtool_list = {
     'backend': {
         'codegen': 'Code generation tool',
         'profile': 'Profile backend model file',
+        'infer': 'Infer backend model file'
     },
 }
 
@@ -64,12 +67,25 @@ def _check_subtool_exists():
 
 
 def _get_parser():
-    onecc_usage = 'onecc [-h] [-v] [-C CONFIG] [COMMAND <args>]'
+    onecc_usage = 'onecc [-h] [-v] [-C CONFIG] [-W WORKFLOW] [-O OPTIMIZATION] [COMMAND <args>]'
     onecc_desc = 'Run ONE driver via several commands or configuration file'
     parser = argparse.ArgumentParser(description=onecc_desc, usage=onecc_usage)
 
     _utils._add_default_arg(parser)
 
+    opt_name_list = _utils._get_optimization_list(get_name=True)
+    opt_name_list = ['-' + s for s in opt_name_list]
+    if not opt_name_list:
+        opt_help_message = '(No available optimization options)'
+    else:
+        opt_help_message = '(Available optimization options: ' + ', '.join(
+            opt_name_list) + ')'
+    opt_help_message = 'optimization name to use ' + opt_help_message
+    parser.add_argument('-O', type=str, metavar='OPTIMIZATION', help=opt_help_message)
+
+    parser.add_argument(
+        '-W', '--workflow', type=str, metavar='WORKFLOW', help='run with workflow file')
+
     # just for help message
     compile_group = parser.add_argument_group('compile to circle model')
     for tool, desc in subtool_list['compile'].items():
@@ -98,45 +114,17 @@ def _parse_arg(parser):
 def _verify_arg(parser, args):
     """verify given arguments"""
     # check if required arguments is given
-    if not _utils._is_valid_attr(args, 'config'):
-        parser.error('-C/--config argument is required')
-
-
-def _get_driver_name(driver_name):
-    return {
-        'one-optimize': 'one-optimize',
-        'one-quantize': 'one-quantize',
-        'one-pack': 'one-pack',
-        'one-codegen': 'one-codegen',
-        'one-profile': 'one-profile'
-    }[driver_name]
-
-
-def _parse_cfg(args):
-    config = configparser.ConfigParser()
-    config.optionxform = str
-    parsed = config.read(os.path.expanduser(getattr(args, 'config')))
-    if not parsed:
-        raise FileNotFoundError('Not found given configuration file')
-    return config
-
-
-def _is_available_driver(config, driver_name):
-    return config.has_option('onecc', driver_name) and config.getboolean(
-        'onecc', driver_name)
-
-
-def _verify_cfg(import_driver_list, config):
-    if not config.has_section('onecc'):
-        raise ImportError('[onecc] section is required in configuration file')
-
-    import_driver_cnt = 0
-    for d in import_driver_list:
-        if _is_available_driver(config, d):
-            import_driver_cnt += 1
-
-    if import_driver_cnt > 1:
-        raise AssertionError('Only one import-* driver can be executed')
+    if not _utils._is_valid_attr(args, 'config') and not _utils._is_valid_attr(
+            args, 'workflow'):
+        parser.error('-C/--config or -W/--workflow argument is required')
+    # check if given optimization option exists
+    opt_name_list = _utils._get_optimization_list(get_name=True)
+    opt_name_list = [_utils._remove_prefix(s, 'O') for s in opt_name_list]
+    if _utils._is_valid_attr(args, 'O'):
+        if ' ' in getattr(args, 'O'):
+            parser.error('Not allowed to have space in the optimization name')
+        if not getattr(args, 'O') in opt_name_list:
+            parser.error('Invalid optimization option')
 
 
 def main():
@@ -158,35 +146,16 @@ def main():
     # verify arguments
     _verify_arg(parser, args)
 
-    # parse configuration file
-    config = _parse_cfg(args)
-
-    # verify configuration file
     bin_dir = os.path.dirname(os.path.realpath(__file__))
-    import_drivers_dict = _utils._detect_one_import_drivers(bin_dir)
-    transform_drivers = [
-        'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile'
-    ]
-    _verify_cfg(import_drivers_dict, config)
-
-    # get sections to run
-    section_to_run = []
-    for d in list(import_drivers_dict) + transform_drivers:
-        if _is_available_driver(config, d):
-            section_to_run.append(d)
-
-    # run
-    dir_path = os.path.dirname(os.path.realpath(__file__))
-    for section in section_to_run:
-        if section in import_drivers_dict:
-            # we already has driver name in dict
-            driver_name = import_drivers_dict[section]
-        else:
-            driver_name = _get_driver_name(section)
-        options = ['--config', getattr(args, 'config'), '--section', section]
-        if _utils._is_valid_attr(args, 'verbose'):
-            options.append('--verbose')
-        _call_driver(driver_name, options)
+    if _utils._is_valid_attr(args, 'config'):
+        runner = CfgRunner(args.config)
+        runner.detect_import_drivers(bin_dir)
+        if _utils._is_valid_attr(args, 'O'):
+            runner.add_opt(getattr(args, 'O'))
+        runner.run(bin_dir)
+    elif _utils._is_valid_attr(args, 'workflow'):
+        runner = WorkflowRunner(args.workflow)
+        runner.run(bin_dir)
 
 
 if __name__ == '__main__':
diff --git a/compiler/one-cmds/onecc.template.cfg b/compiler/one-cmds/onecc.template.cfg
index a23d1cea9..6f6a4e266 100644
--- a/compiler/one-cmds/onecc.template.cfg
+++ b/compiler/one-cmds/onecc.template.cfg
@@ -1,28 +1,144 @@
+; To activate a step (or task),
+; set True for the step in [onecc] section and fill options in the corresponding section
 [onecc]
-one-import-tf=True
+; neural network model to circle
+one-import-tf=False
 one-import-tflite=False
 one-import-bcq=False
 one-import-onnx=False
-one-optimize=True
+; circle to circle with optimization
+one-optimize=False
+; circle to circle with quantization
 one-quantize=False
-one-pack=True
+; partition circle
+one-partition=False
+; package circle and metadata into nnpackage
+one-pack=False
+; generate code for backend
 one-codegen=False
+; profile
 one-profile=False
+; infer
+one-infer=False
 
 [one-import-tf]
-input_path=/path/to/inception_v3.pb
-output_path=inception_v3.circle
-input_arrays=input
-input_shapes=1,299,299,3
-output_arrays=InceptionV3/Predictions/Reshape_1
-converter_version=v1
+# mandatory
+; pb file
+input_path=
+; circle file
+output_path=
+# optional
+; v1 or v2
+converter_version=v2
+; graph_def(default), saved_model or keras_model
 model_format=graph_def
+# optional but mandatory for model_format=graph_def
+; input tensor names of the input arrays, comma-separated
+input_arrays=
+; output tensor names of the input arrays, comma-separated
+output_arrays=
+; input shapes corresponding to --input_arrays, colon-separated.(ex:1,4,4,3:1,20,20,3)
+input_shapes=
+
+[one-import-tflite]
+# mandatory
+; tflite file
+input_path=
+; circle file
+output_path=
+
+[one-import-bcq]
+# mandatory
+; bcq file
+input_path=
+; circle file
+output_path=
+# optional
+; v1 or v2
+converter_version=v2
+; graph_def(default), saved_model or keras_model
+model_format=graph_def
+# optional but mandatory for model_format=graph_def
+; input tensor names of the input arrays, comma-separated
+input_arrays=
+; output tensor names of the input arrays, comma-separated
+output_arrays=
+; input shapes corresponding to --input_arrays, colon-separated.(ex:1,4,4,3:1,20,20,3)
+input_shapes=
+
+[one-import-onnx]
+# mandatory
+; onnx file
+input_path=
+; circle file
+output_path=
+# optional
+; True or False
+unroll_rnn=
+; True or False
+unroll_lstm=
 
 [one-optimize]
-input_path=inception_v3.circle
-output_path=inception_v3.opt.circle
-generate_profile_data=False
+# mandatory
+; circle file
+input_path=
+; circle file
+output_path=
+# //TODO: Add available options
+
+[one-quantize]
+# mandatory
+; circle file
+input_path=
+; circle file
+output_path=
+# optional arguments for quantization
+; input data file (if not given, random data will be used for calibration)
+input_data=
+; h5/hdf5(default), list/filelist, or dir/directory
+input_data_format=
+; dtype of quantized model (uint8(default), int16)
+quantized_dtype=
+; granularity of quantization (layer(default), channel)
+granularity=
+; dtype of model's input (uint8, int16, float32). Same with quantized_dtype by default.
+input_type=
+; dtype of model's output (uint8, int16, float32). Same with quantized_dtype by default.
+output_type=
+
+[one-partition]
+# mandatory
+; partition file which provides backend to assign
+part_file=
+; circle file
+input_file=
+# //TODO: Add available options
 
 [one-pack]
-input_path=inception_v3.opt.circle
-output_path=inception_v3_pack
+# mandatory
+; input path
+input_path=
+; output path
+output_path=
+# //TODO: Add available options
+
+[one-codegen]
+# mandatory
+; backend name
+backend=
+; commands for each backend
+command=
+
+[one-profile]
+# mandatory
+; backend name
+backend=
+# //TODO: Add available options
+
+[one-infer]
+# mandatory (mutually exclusive)
+; backend name
+backend=
+; driver name
+driver=
+# //TODO: Add available options
diff --git a/compiler/one-cmds/onelib/CfgRunner.py b/compiler/one-cmds/onelib/CfgRunner.py
new file mode 100644
index 000000000..c66e5b4ba
--- /dev/null
+++ b/compiler/one-cmds/onelib/CfgRunner.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import configparser
+import os
+import warnings
+
+import utils as oneutils
+
+
+def _simple_warning(message, category, filename, lineno, file=None, line=None):
+    return f'{category.__name__}: {message}\n'
+
+
+class CfgRunner:
+    driver_sequence = [
+        'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile',
+        'one-partition', 'one-infer'
+    ]
+
+    def __init__(self, path):
+        self.path = path
+        self.optparser = None
+        self.cfgparser = configparser.ConfigParser()
+        # make option names case sensitive
+        self.cfgparser.optionxform = str
+        parsed = self.cfgparser.read(os.path.expanduser(path))
+        if not parsed:
+            raise FileNotFoundError('Not found given configuration file')
+
+        self._verify_cfg(self.cfgparser)
+        # default import drivers
+        self.import_drivers = [
+            'one-import-bcq', 'one-import-onnx', 'one-import-tf', 'one-import-tflite'
+        ]
+
+    def _verify_cfg(self, cfgparser):
+        if not cfgparser.has_section('onecc'):
+            if cfgparser.has_section('one-build'):
+                warnings.formatwarning = _simple_warning
+                warnings.warn(
+                    "[one-build] section will be deprecated. Please use [onecc] section.")
+            else:
+                raise ImportError('[onecc] section is required in configuration file')
+
+    def _is_available(self, driver):
+        # if there's no `onecc` section, it will find `one-build` section because of backward compatibility
+        return (self.cfgparser.has_option('onecc', driver) and self.cfgparser.getboolean(
+            'onecc', driver)) or (self.cfgparser.has_option('one-build', driver)
+                                  and self.cfgparser.getboolean('one-build', driver))
+
+    def add_opt(self, opt):
+        self.optparser = configparser.ConfigParser()
+        # make option names case sensitive
+        self.optparser.optionxform = str
+        opt_book = dict(
+            zip(oneutils._get_optimization_list(get_name=True),
+                oneutils._get_optimization_list()))
+        parsed = self.optparser.read(opt_book['O' + opt])
+        if not parsed:
+            raise FileNotFoundError('Not found given optimization configuration file')
+        if len(self.optparser.sections()) != 1 or self.optparser.sections(
+        )[0] != 'one-optimize':
+            raise AssertionError(
+                'Optimization configuration file only allowed to have a \'one-optimize\' section'
+            )
+        self.opt = opt
+
+    def detect_import_drivers(self, dir):
+        self.import_drivers = list(oneutils._detect_one_import_drivers(dir).keys())
+
+    def run(self, working_dir, verbose=False):
+        section_to_run = []
+        for d in self.import_drivers + self.driver_sequence:
+            if self._is_available(d):
+                section_to_run.append(d)
+
+        for section in section_to_run:
+            options = ['--config', self.path, '--section', section]
+            if section == 'one-optimize' and self.optparser:
+                options += ['-O', self.opt]
+            if verbose:
+                options.append('--verbose')
+            driver_path = os.path.join(working_dir, section)
+            cmd = [driver_path] + options
+            oneutils._run(cmd)
diff --git a/compiler/one-cmds/onelib/OptionBuilder.py b/compiler/one-cmds/onelib/OptionBuilder.py
new file mode 100644
index 000000000..6a75783ad
--- /dev/null
+++ b/compiler/one-cmds/onelib/OptionBuilder.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from onelib.constant import CONSTANT
+
+
+class OptionBuilder:
+    def __init__(self, one_cmd_type):
+        self.type = one_cmd_type
+
+    def _build_default(self, commands):
+        options = []
+        for k, v in commands.items():
+            options.extend(['--' + k, v])
+        return options
+
+    def _build_with_unknown_command(self, commands):
+        COMMAND_K = 'command'
+        options = []
+        for k, v in commands.items():
+            if k == COMMAND_K:
+                continue
+            options.extend(['--' + k, v])
+        options.extend(['--'])
+        options.extend(commands[COMMAND_K].split())
+        return options
+
+    def _build_import(self, commands):
+        options = []
+        arg_0 = ['save_intermediate']
+        for k, v in commands.items():
+            if k in arg_0 and v == "True":
+                options.extend(['--' + k])
+                continue
+            options.extend(['--' + k, v])
+        return options
+
+    def _build_optimize(self, commands):
+        options = []
+        arg_0 = ['generate_profile_data']
+        arg_1 = ['input_path', 'output_path', 'change_outputs']
+        for k, v in commands.items():
+            if k in arg_1:
+                options.extend(['--' + k, v])
+                continue
+            if k in arg_0 and v == 'True':
+                options.extend(['--' + k])
+                continue
+            for opt in CONSTANT.OPTIMIZATION_OPTS:
+                if k == opt[0] and v == "True":
+                    options.extend(['--' + k])
+                    break
+        return options
+
+    def _build_quantize(self, commands):
+        options = []
+        arg_0 = [
+            'generate_profile_data', 'save_intermediate', 'TF-style_maxpool',
+            'evaluate_result', 'print_mae', 'print_mape', 'print_mpeir',
+            'print_top1_match', 'print_top5_match', 'force_quantparam', 'copy_quantparam'
+        ]
+        for k, v in commands.items():
+            if k in arg_0 and v == "True":
+                options.extend(['--' + k])
+                continue
+            options.extend(['--' + k, v])
+        return options
+
+    def build(self, commands):
+        cmd_book = dict.fromkeys(
+            ['one-import-bcq', 'one-import-tflite', 'one-pack', 'one-partition'],
+            self._build_default)
+        cmd_book['one-codegen'] = self._build_with_unknown_command
+        cmd_book['one-import-onnx'] = self._build_import
+        cmd_book['one-import-pytorch'] = self._build_import
+        cmd_book['one-import-tf'] = self._build_import
+        cmd_book['one-infer'] = self._build_with_unknown_command
+        cmd_book['one-optimize'] = self._build_optimize
+        cmd_book['one-profile'] = self._build_with_unknown_command
+        cmd_book['one-quantize'] = self._build_quantize
+
+        return cmd_book[self.type](commands)
diff --git a/compiler/one-cmds/onelib/TopologicalSortHelper.py b/compiler/one-cmds/onelib/TopologicalSortHelper.py
new file mode 100644
index 000000000..d05adea8d
--- /dev/null
+++ b/compiler/one-cmds/onelib/TopologicalSortHelper.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections import defaultdict
+
+
+class TopologicalSortHelper:
+    def __init__(self, vertices):
+        self.graph = defaultdict(list)
+        self.vertices = vertices
+
+    def add_edge(self, u, v):
+        self.graph[u].append(v)
+
+    def sort_util(self, v, visited, stack):
+        visited[v] = True
+
+        for i in self.graph[v]:
+            if visited[i] == False:
+                self.sort_util(i, visited, stack)
+
+        stack.insert(0, v)
+
+    def sort(self):
+        visited = dict.fromkeys(self.vertices, False)
+        stack = []
+
+        for v in self.vertices:
+            if visited[v] == False:
+                self.sort_util(v, visited, stack)
+
+        return stack
diff --git a/compiler/one-cmds/onelib/WorkflowRunner.py b/compiler/one-cmds/onelib/WorkflowRunner.py
new file mode 100644
index 000000000..0482dd9da
--- /dev/null
+++ b/compiler/one-cmds/onelib/WorkflowRunner.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+
+from onelib.OptionBuilder import OptionBuilder
+from onelib.TopologicalSortHelper import TopologicalSortHelper
+from onelib.CfgRunner import CfgRunner
+import utils as oneutils
+
+
+class WorkflowRunner:
+    WORKFLOWS_K = 'workflows'
+    DEPENDENCIES_K = 'run-after'
+    CFG_REFERENCE_K = 'cfg-reference'
+    WORKFLOW_STEPS_K = 'steps'
+    ONE_CMD_TOOL_K = 'one-cmd'
+    COMMANDS_K = 'commands'
+
+    def __init__(self, path):
+        try:
+            with open(path) as f:
+                self.json_contents = json.load(f)
+        except FileNotFoundError:
+            raise FileNotFoundError("Not found given workflow file")
+        except json.decoder.JSONDecodeError:
+            raise ImportError("Invalid workflow file")
+
+        self._verify_workflow(self.json_contents)
+
+        workflows = self.json_contents[self.WORKFLOWS_K]
+        self.adj = dict.fromkeys(workflows, [])
+        # decide the order according to the dependencies of each workflow.
+        helper = TopologicalSortHelper(workflows)
+        for workflow_k in workflows:
+            workflow = self.json_contents[workflow_k]
+            if self.DEPENDENCIES_K in workflow:
+                for previous_workflow in workflow[self.DEPENDENCIES_K]:
+                    helper.add_edge(previous_workflow, workflow_k)
+                    self.adj[previous_workflow].append(workflow_k)
+        self.workflow_sequence = helper.sort()
+
+        self._check_cycle()
+
+    def _check_cycle(self):
+        pos = dict()
+        index = 0
+        workflow_num = len(self.workflow_sequence)
+        # number the order
+        for seq_idx in range(workflow_num):
+            pos[self.workflow_sequence[seq_idx]] = index
+            index += 1
+
+        for seq_idx in range(workflow_num):
+            first_wf = self.workflow_sequence[seq_idx]
+            for adj_wf in self.adj[first_wf]:
+                first_pos = 0 if first_wf not in pos else pos[first_wf]
+                second_pos = 0 if adj_wf not in pos else pos[adj_wf]
+                if (first_pos > second_pos):
+                    raise RuntimeError("Workflows should not have a cycle")
+
+    def _verify_workflow(self, json_contents):
+        # workflow file should have WORKFLOWS_K
+        if not self.WORKFLOWS_K in json_contents:
+            raise ValueError("Not found \"" + self.WORKFLOWS_K +
+                             "\" key in workflow file")
+
+        workflows = json_contents[self.WORKFLOWS_K]
+        # workflow file should have keys listed in WORKFLOWS_K
+        for workflow_k in workflows:
+            if not workflow_k in json_contents:
+                raise ValueError("Not found " + workflow_k + " key listed in \"" +
+                                 self.WORKFLOWS_K + "\"")
+
+        # each workflow should have either WORKFLOW_STEPS_K or CFG_REFERENCE_K
+        for workflow_k in workflows:
+            if not self.WORKFLOW_STEPS_K in json_contents[workflow_k] and not self.CFG_REFERENCE_K in json_contents[workflow_k]:
+                raise ValueError("Each workflow should have either \"" +
+                                 self.WORKFLOW_STEPS_K + "\" or \"" +
+                                 self.CFG_REFERENCE_K + "\"")
+        for workflow_k in workflows:
+            if self.WORKFLOW_STEPS_K in json_contents[workflow_k] and self.CFG_REFERENCE_K in json_contents[workflow_k]:
+                raise ValueError("\"" + self.WORKFLOW_STEPS_K + "\" and \"" +
+                                 self.CFG_REFERENCE_K + "\" are exclusive key")
+
+        # each step should have ONE_CMD_TOOL_K and COMMANDS_K
+        for workflow_k in workflows:
+            workflow = json_contents[workflow_k]
+            if self.WORKFLOW_STEPS_K in workflow:
+                step_keys = workflow[self.WORKFLOW_STEPS_K]
+                for step_k in step_keys:
+                    step = workflow[step_k]
+                    if not self.ONE_CMD_TOOL_K in step or not self.COMMANDS_K in step:
+                        raise ValueError("Each step should have \"" +
+                                         self.ONE_CMD_TOOL_K + "\"" + " and \"" +
+                                         self.COMMANDS_K + "\"")
+
+    def run(self, working_dir, verbose=False):
+        # run workflows in sequence
+        for workflow_k in self.workflow_sequence:
+            workflow = self.json_contents[workflow_k]
+            if self.WORKFLOW_STEPS_K in workflow:
+                steps = workflow[self.WORKFLOW_STEPS_K]
+                for step_k in steps:
+                    step = workflow[step_k]
+                    commands = step[self.COMMANDS_K]
+                    driver_name = step[self.ONE_CMD_TOOL_K]
+                    option_builder = OptionBuilder(driver_name)
+                    options = option_builder.build(commands)
+                    # get the absolute path of the caller
+                    driver_path = os.path.join(working_dir, driver_name)
+                    cmd = [driver_path] + options
+                    oneutils._run(cmd)
+            elif self.CFG_REFERENCE_K in workflow:
+                cfg_path = workflow[self.CFG_REFERENCE_K]['path']
+                runner = CfgRunner(cfg_path)
+                runner.run(working_dir, verbose)
diff --git a/compiler/one-cmds/onelib/constant.py b/compiler/one-cmds/onelib/constant.py
index 7ddd7382d..7dd79b65d 100644
--- a/compiler/one-cmds/onelib/constant.py
+++ b/compiler/one-cmds/onelib/constant.py
@@ -14,11 +14,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+
 class CONSTANT:
     __slots__ = ()  # This prevents access via __dict__.
     OPTIMIZATION_OPTS = (
         # (OPTION_NAME, HELP_MESSAGE)
-        ('O1', 'enable O1 optimization pass'),
         ('convert_nchw_to_nhwc',
          'Experimental: This will convert NCHW operators to NHWC under the assumption that input model is NCHW.'
          ),
@@ -29,6 +29,7 @@ class CONSTANT:
          'convert the output shape of the model (argument for convert_nchw_to_nhwc)'),
         ('fold_add_v2', 'fold AddV2 op with constant inputs'),
         ('fold_cast', 'fold Cast op with constant input'),
+        ('fold_densify', 'fold Densify op with sparse constant input'),
         ('fold_dequantize', 'fold Dequantize op'),
         ('fold_dwconv', 'fold Depthwise Convolution op with constant inputs'),
         ('fold_gather', 'fold Gather op'),
@@ -62,12 +63,16 @@ class CONSTANT:
         ('remove_unnecessary_slice', 'remove unnecessary slice ops'),
         ('remove_unnecessary_strided_slice', 'remove unnecessary strided slice ops'),
         ('remove_unnecessary_split', 'remove unnecessary split ops'),
+        ('replace_non_const_fc_with_batch_matmul',
+         'replace FullyConnected op with non-const weights to BatchMatMul op'),
+        ('replace_sub_with_add', 'replace Sub op with Add op'),
         ('resolve_customop_add', 'convert Custom(Add) op to Add op'),
         ('resolve_customop_batchmatmul',
          'convert Custom(BatchMatmul) op to BatchMatmul op'),
         ('resolve_customop_matmul', 'convert Custom(Matmul) op to Matmul op'),
         ('resolve_customop_max_pool_with_argmax',
          'convert Custom(MaxPoolWithArgmax) to net of builtin operators'),
+        ('resolve_customop_splitv', 'convert Custom(SplitV) op to SplitV op'),
         ('shuffle_weight_to_16x1float32',
          'convert weight format of FullyConnected op to SHUFFLED16x1FLOAT32.'
          ' Note that it only converts weights whose row is a multiple of 16'),
diff --git a/compiler/one-cmds/onelib/make_cmd.py b/compiler/one-cmds/onelib/make_cmd.py
index d8380f28d..0015e8319 100644
--- a/compiler/one-cmds/onelib/make_cmd.py
+++ b/compiler/one-cmds/onelib/make_cmd.py
@@ -19,6 +19,7 @@ import sys
 
 import onelib.constant as _constant
 
+
 def _is_valid_attr(args, attr):
     return hasattr(args, attr) and getattr(args, attr)
 
@@ -64,6 +65,10 @@ def make_tf2tfliteV2_cmd(args, driver_path, input_path, output_path):
         cmd.append('--output_arrays')
         cmd.append(getattr(args, 'output_arrays'))
 
+    # experimental options
+    if _is_valid_attr(args, 'experimental_disable_batchmatmul_unfold'):
+        cmd.append('--experimental_disable_batchmatmul_unfold')
+
     return cmd
 
 
diff --git a/compiler/one-cmds/onnx_legalizer.py b/compiler/one-cmds/onnx_legalizer.py
index 26c2b75b9..0141514b6 100755
--- a/compiler/one-cmds/onnx_legalizer.py
+++ b/compiler/one-cmds/onnx_legalizer.py
@@ -341,7 +341,8 @@ def _dtype_to_np(dtype):
         raise NotImplementedError('unsupported data type')
 
 
-def _generate_one_direction_RNN(transformer, X, W, R, B, initial_h, clip, activation_name):
+def _generate_one_direction_RNN(transformer, X, W, R, B, initial_h, clip,
+                                activation_name):
     """Generate subgraph of one direction of unrolled RNN layer
 
     Args:
@@ -395,7 +396,7 @@ def _generate_one_direction_RNN(transformer, X, W, R, B, initial_h, clip, activa
 
 
 def _transform_unidirectional_RNN(transformer, original_node, x, tensor_infos, activation,
-                                 clip, direction, hidden_size, layout):
+                                  clip, direction, hidden_size, layout):
     """Generate Simple (forward or reverse) unrolled RNN
 
     Args:
@@ -432,7 +433,7 @@ def _transform_unidirectional_RNN(transformer, original_node, x, tensor_infos, a
     else:
         initial_h = None
     state_tensors = _generate_one_direction_RNN(transformer, x, w, r, b, initial_h, clip,
-                                               activation)
+                                                activation)
     y_direction_dim = layout + 1
     y_h_direction_dim = layout
     state_layout_tensors = []
@@ -447,12 +448,11 @@ def _transform_unidirectional_RNN(transformer, original_node, x, tensor_infos, a
     transformer.make_node(
         'Unsqueeze', [state_tensors[-1]], [Y_h], axes=[y_h_direction_dim])
     Y = outputs[0]
-    transformer.make_node(
-        'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+    transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim)
 
 
 def _transform_bidirectional_RNN(transformer, original_node, x, tensor_infos, activations,
-                                clip, hidden_size, layout):
+                                 clip, hidden_size, layout):
     """Generate Bidirectional unrolled RNN
 
     Args:
@@ -503,10 +503,10 @@ def _transform_bidirectional_RNN(transformer, original_node, x, tensor_infos, ac
             initial_h[d] = transformer.make_squeeze(initial_h[d], axes=[direction_dim])
 
     state_f_tensors = _generate_one_direction_RNN(transformer, x, w[0], r[0], b[0],
-                                                 initial_h[0], clip, activations[0])
+                                                  initial_h[0], clip, activations[0])
     x.reverse()
     state_b_tensors = _generate_one_direction_RNN(transformer, x, w[1], r[1], b[1],
-                                                 initial_h[1], clip, activations[1])
+                                                  initial_h[1], clip, activations[1])
     state_b_tensors.reverse()
 
     y_direction_dim = layout + 1
@@ -538,8 +538,7 @@ def _transform_bidirectional_RNN(transformer, original_node, x, tensor_infos, ac
         axis=y_h_direction_dim)
 
     Y = outputs[0]
-    transformer.make_node(
-        'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+    transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim)
 
 
 def _legalize_RNN(transformer, tensor_infos, node):
@@ -600,10 +599,10 @@ def _legalize_RNN(transformer, tensor_infos, node):
 
     if direction in ['forward', 'reverse']:
         _transform_unidirectional_RNN(transformer, node, x, tensor_infos, activations[0],
-                                     clip, direction, hidden_size, layout)
+                                      clip, direction, hidden_size, layout)
     elif direction == 'bidirectional':
-        _transform_bidirectional_RNN(transformer, node, x, tensor_infos, activations, clip,
-                                    hidden_size, layout)
+        _transform_bidirectional_RNN(transformer, node, x, tensor_infos, activations,
+                                     clip, hidden_size, layout)
     else:
         raise RuntimeError('Unknown RNN type')
 
@@ -611,7 +610,7 @@ def _legalize_RNN(transformer, tensor_infos, node):
 
 
 def _generate_one_direction_LSTM(transformer, X, W, R, B, initial_h, initial_c, P, clip,
-                                act, dtype, hidden_size, batch_size):
+                                 act, dtype, hidden_size, batch_size):
     """Generate subgraph for one direction of unrolled LSTM layer
 
     Args:
@@ -754,7 +753,7 @@ def _generate_one_direction_LSTM(transformer, X, W, R, B, initial_h, initial_c,
 
 
 def _transform_unidirectional_LSTM(transformer, original_node, x, tensor_infos,
-                                  activations, clip, direction, hidden_size, layout):
+                                   activations, clip, direction, hidden_size, layout):
     """Generate Simple (forward or reverse) unrolled LSTM
 
     Args:
@@ -818,17 +817,15 @@ def _transform_unidirectional_LSTM(transformer, original_node, x, tensor_infos,
     transformer.make_node(
         'Unsqueeze', [state_h_tensors[-1]], [Y_h], axes=[y_h_direction_dim])
     Y_c = outputs[2]
-    transformer.make_node(
-        'Unsqueeze', [state_c_tensor], [Y_c], axes=[y_h_direction_dim])
+    transformer.make_node('Unsqueeze', [state_c_tensor], [Y_c], axes=[y_h_direction_dim])
     if direction == 'reverse':
         state_layout_tensors.reverse()
     Y = outputs[0]
-    transformer.make_node(
-        'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+    transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim)
 
 
-def _transform_bidirectional_LSTM(transformer, original_node, x, tensor_infos, activations,
-                                 clip, hidden_size, layout):
+def _transform_bidirectional_LSTM(transformer, original_node, x, tensor_infos,
+                                  activations, clip, hidden_size, layout):
     """Generate Bidirectional unrolled LSTM
 
     Args:
@@ -929,12 +926,10 @@ def _transform_bidirectional_LSTM(transformer, original_node, x, tensor_infos, a
     Y_f_c = transformer.make_unsqueeze(state_f_c_tensor, axes=[y_c_direction_dim])
     Y_b_c = transformer.make_unsqueeze(state_b_c_tensor, axes=[y_c_direction_dim])
     Y_c = outputs[2]
-    transformer.make_node(
-        'Concat', [Y_f_c, Y_b_c], [Y_c], axis=y_c_direction_dim)
+    transformer.make_node('Concat', [Y_f_c, Y_b_c], [Y_c], axis=y_c_direction_dim)
 
     Y = outputs[0]
-    transformer.make_node(
-        'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+    transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim)
 
 
 def _legalize_LSTM(transformer, tensor_infos, node):
@@ -1001,10 +996,10 @@ def _legalize_LSTM(transformer, tensor_infos, node):
 
     if direction in ['forward', 'reverse']:
         _transform_unidirectional_LSTM(transformer, node, x, tensor_infos, activations,
-                                      clip, direction, hidden_size, layout)
+                                       clip, direction, hidden_size, layout)
     elif direction == 'bidirectional':
         _transform_bidirectional_LSTM(transformer, node, x, tensor_infos, activations,
-                                     clip, hidden_size, layout)
+                                      clip, hidden_size, layout)
     else:
         raise RuntimeError('Unknown LSTM type')
 
@@ -1052,10 +1047,12 @@ def legalize(model, options):
 
 if __name__ == '__main__':
     if len(sys.argv) < 3:
-        print('usage: ./legalize_onnx.py <path to input model> <path to output model>\n'
-              '\n'
-              '    In stand-alone utility mode this tool provides basic funtionality\n'
-              '    If you want to have more control over applied transformations, use this legalizer as a library')
+        print(
+            'usage: ./legalize_onnx.py <path to input model> <path to output model>\n'
+            '\n'
+            '    In stand-alone utility mode this tool provides basic funtionality\n'
+            '    If you want to have more control over applied transformations, use this legalizer as a library'
+        )
         exit(1)
     options = LegalizeOptions()
     options.unroll_lstm = True
diff --git a/compiler/one-cmds/requires.cmake b/compiler/one-cmds/requires.cmake
index b1aabdb97..c27920980 100644
--- a/compiler/one-cmds/requires.cmake
+++ b/compiler/one-cmds/requires.cmake
@@ -1,6 +1,7 @@
 require("tf2tfliteV2")
 require("tflite2circle")
 require("circle2circle")
+require("circle-eval-diff")
 require("circle-quantizer")
 require("record-minmax")
 require("vconone")
diff --git a/compiler/one-cmds/tests/CMakeLists.txt b/compiler/one-cmds/tests/CMakeLists.txt
index caea756c2..17f55ec96 100644
--- a/compiler/one-cmds/tests/CMakeLists.txt
+++ b/compiler/one-cmds/tests/CMakeLists.txt
@@ -4,6 +4,8 @@
 file(GLOB TESTITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.test")
 file(GLOB CONFIGITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.cfg")
 file(GLOB QCONFIGITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.qconf.json")
+file(GLOB PYSCRIPTS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.py")
+file(GLOB WORKFLOWITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.workflow.json")
 
 # Create a script to run the tests at installation folder
 set(DRIVER_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/runtestall.sh")
@@ -45,6 +47,16 @@ foreach(QCONFIGITEM IN ITEMS ${QCONFIGITEMS})
   install(FILES ${QCONFIGITEM} DESTINATION test)
 endforeach(QCONFIGITEM)
 
+foreach(PYSCRIPT IN ITEMS ${PYSCRIPTS})
+  get_filename_component(ITEM_PREFIX ${PYSCRIPT} NAME_WE)
+  install(FILES ${PYSCRIPT} DESTINATION test)
+endforeach(PYSCRIPT)
+
+foreach(WORKFLOWITEM IN ITEMS ${WORKFLOWITEMS})
+  get_filename_component(ITEM_PREFIX ${WORKFLOWITEM} NAME_WE)
+  install(FILES ${WORKFLOWITEM} DESTINATION test)
+endforeach(WORKFLOWITEM)
+
 file(APPEND "${DRIVER_SCRIPT}" "popd > /dev/null\n\n")
 
 file(APPEND "${DRIVER_SCRIPT}"
diff --git a/compiler/one-cmds/tests/OONECC_024.cfg b/compiler/one-cmds/tests/OONECC_024.cfg
new file mode 100644
index 000000000..a39aae071
--- /dev/null
+++ b/compiler/one-cmds/tests/OONECC_024.cfg
@@ -0,0 +1,2 @@
+[one-optimize]
+make_batchnorm_gamma_positive=True
diff --git a/compiler/one-cmds/tests/one-build_008.cfg b/compiler/one-cmds/tests/one-build_008.cfg
index 615047c86..8c777f64f 100644
--- a/compiler/one-cmds/tests/one-build_008.cfg
+++ b/compiler/one-cmds/tests/one-build_008.cfg
@@ -15,7 +15,6 @@ output_path=test_onnx_model.circle
 [one-optimize]
 input_path=test_onnx_model.circle
 output_path=test_onnx_model.opt.circle
-all=True
 remove_redundant_transpose=True
 
 [one-codegen]
diff --git a/compiler/one-cmds/tests/one-build_009.cfg b/compiler/one-cmds/tests/one-build_009.cfg
index 66bca250d..b5a35dd97 100644
--- a/compiler/one-cmds/tests/one-build_009.cfg
+++ b/compiler/one-cmds/tests/one-build_009.cfg
@@ -15,7 +15,6 @@ output_path=onnx_conv2d_conv2d.circle
 [one-optimize]
 input_path=onnx_conv2d_conv2d.circle
 output_path=onnx_conv2d_conv2d.opt.circle
-all=True
 remove_redundant_transpose=True
 convert_nchw_to_nhwc=True
 
diff --git a/compiler/one-cmds/tests/one-import-onnx_002.test b/compiler/one-cmds/tests/one-import-onnx_002.test
new file mode 100644
index 000000000..a6a38eee5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import-onnx_002.test
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# test for experimental_disable_batchmatmul_unfold option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./reshape_matmul.onnx"
+outputfile="./reshape_matmul.circle"
+
+rm -rf ${outputfile}
+rm -rf ${outputfile}.log
+
+# run test without option that should drop FULLY_CONNECTED
+one-import-onnx \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+circle-operator --code reshape_matmul.circle > ${outputfile}.log 2>&1
+
+if ! grep -q "FULLY_CONNECTED" "${outputfile}.log"; then
+  trap_err_onexit
+fi
+
+rm -rf ${outputfile}
+rm -rf ${outputfile}.log
+
+# run test with option that should drop BATCH_MATMUL
+one-import-onnx \
+--experimental_disable_batchmatmul_unfold \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+circle-operator --code reshape_matmul.circle > ${outputfile}.log 2>&1
+
+if ! grep -q "BATCH_MATMUL" "${outputfile}.log"; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
+exit 0
diff --git a/compiler/one-cmds/tests/one-infer-test-post-process.py b/compiler/one-cmds/tests/one-infer-test-post-process.py
new file mode 100644
index 000000000..0f0e0d701
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer-test-post-process.py
@@ -0,0 +1,16 @@
+# This script gets one argument and print it
+
+import sys
+from pathlib import Path
+
+
+def main():
+    if len(sys.argv) < 2:
+        filepath = Path(sys.argv[0])
+        sys.exit("Usage: " + filepath.name + " [Word to print]")
+    word = sys.argv[1]
+    print(word)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/compiler/one-cmds/tests/one-infer_001.test b/compiler/one-cmds/tests/one-infer_001.test
new file mode 100644
index 000000000..e7b569522
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_001.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/help-infer
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+# copy help-infer to bin folder
+cp help-infer ../bin/help-infer
+
+# run test
+one-infer -b help -- -h > ${filename}.log
+
+rm -rf ../bin/help-infer
+
+if grep -q "HELP MESSAGE!!" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-infer_002.test b/compiler/one-cmds/tests/one-infer_002.test
new file mode 100644
index 000000000..22070de19
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_002.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-infer
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+  touch ${inputfile}
+fi
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+# run test
+one-infer -d dummy-infer -- ${inputfile} > ${filename}.log
+
+rm -rf ../bin/dummy-infer
+
+if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-infer_003.test b/compiler/one-cmds/tests/one-infer_003.test
new file mode 100644
index 000000000..e2aa459a1
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_003.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-infer
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+  touch ${inputfile}
+fi
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+# run test
+one-infer -b dummy -- ${inputfile} > ${filename}.log
+
+rm -rf ../bin/dummy-infer
+
+if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-infer_004.test b/compiler/one-cmds/tests/one-infer_004.test
new file mode 100644
index 000000000..a4cb76c55
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_004.test
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# print one-infer's help message
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+# run test
+one-infer -h > ${filename}.log
+
+if grep -q "command line tool to infer model" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-infer_005.cfg b/compiler/one-cmds/tests/one-infer_005.cfg
new file mode 100644
index 000000000..aca687801
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_005.cfg
@@ -0,0 +1,3 @@
+[one-infer]
+backend=dummy
+command=sample.tvn
diff --git a/compiler/one-cmds/tests/one-infer_005.test b/compiler/one-cmds/tests/one-infer_005.test
new file mode 100644
index 000000000..a44dd0e25
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_005.test
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-infer with configuration input
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-infer
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-infer_005.cfg"
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+  touch ${inputfile}
+fi
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+# run test
+one-infer -C ${configfile} > ${filename}.log
+
+rm -rf ../bin/dummy-infer
+
+if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-infer_006.test b/compiler/one-cmds/tests/one-infer_006.test
new file mode 100644
index 000000000..2612133a3
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_006.test
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-infer with post process script
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-infer
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+  touch ${inputfile}
+fi
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+# run test
+one-infer -b dummy --post-process "./one-infer-test-post-process.py TOKEN" -- ${inputfile} > ${filename}.log 2>&1
+return_code=$?
+
+rm -rf ../bin/dummy-infer
+
+if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+  if [ "$return_code" -eq "0" ]; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-infer_neg_001.test b/compiler/one-cmds/tests/one-infer_neg_001.test
new file mode 100644
index 000000000..62e721128
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_neg_001.test
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with no input
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "error: the following arguments are required: {-d/--driver | -b/--backend}" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+# run test
+one-infer > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-infer_neg_002.test b/compiler/one-cmds/tests/one-infer_neg_002.test
new file mode 100644
index 000000000..fa88876e8
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_neg_002.test
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# passed driver is not found
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+driver_name="neg-infer"
+
+trap_err_onexit()
+{
+  if grep -q "FileNotFoundError: ${driver_name} not found" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+# run test
+one-infer -d ${driver_name} -- -h> ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-infer_neg_003.test b/compiler/one-cmds/tests/one-infer_neg_003.test
new file mode 100644
index 000000000..a0005520f
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_neg_003.test
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# passed backend is not found
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+backend_name="neg"
+
+trap_err_onexit()
+{
+  if grep -q "FileNotFoundError: ${backend_name}-infer not found" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+# run test
+one-infer -b ${backend_name} -- -h> ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-infer_neg_004.test b/compiler/one-cmds/tests/one-infer_neg_004.test
new file mode 100644
index 000000000..b9130d051
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_neg_004.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# both -b and -d option drivers are given as argument
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+backend_name="neg"
+driver_name="neg2"
+
+trap_err_onexit()
+{
+  if grep -q "\-d and -b options are mutually exclusive. Please use only one of them" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+# run test
+one-infer -d ${driver_name} -b ${backend_name} -- -h> ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-infer_neg_005.test b/compiler/one-cmds/tests/one-infer_neg_005.test
new file mode 100644
index 000000000..9074debcf
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_neg_005.test
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-infer with invalid post process script
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  return_code=$?
+  if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+    # Case of succeed of inference driver but error after it
+    if [ "$return_code" -ne "0" ]; then
+      echo "${filename_ext} SUCCESS"
+      exit 0
+    fi
+  fi
+
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-infer
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+  touch ${inputfile}
+fi
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+# run test
+one-infer -b dummy --post-process "./one-infer-test-post-process.py" -- ${inputfile} > ${filename}.log 2>&1
+
+rm -rf ../bin/dummy-infer
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-optimize_001.test b/compiler/one-cmds/tests/one-optimize_001.test
index 8eb58f4eb..4152fa3dd 100644
--- a/compiler/one-cmds/tests/one-optimize_001.test
+++ b/compiler/one-cmds/tests/one-optimize_001.test
@@ -40,7 +40,7 @@ if [[ ! -s ${inputfile} ]]; then
 fi
 
 # run test
-one-optimize --O1 \
+one-optimize --resolve_customop_add \
 --input_path ${inputfile} \
 --output_path ${outputfile} > /dev/null 2>&1
 
diff --git a/compiler/one-cmds/tests/one-optimize_002.test b/compiler/one-cmds/tests/one-optimize_002.test
index bd64494be..58f792bf8 100644
--- a/compiler/one-cmds/tests/one-optimize_002.test
+++ b/compiler/one-cmds/tests/one-optimize_002.test
@@ -40,7 +40,7 @@ if [[ ! -s ${inputfile} ]]; then
 fi
 
 # run test
-one-optimize --O1 \
+one-optimize --resolve_customop_add \
 --change_outputs InceptionV3/Logits/SpatialSqueeze1 \
 --input_path ${inputfile} \
 --output_path ${outputfile} > /dev/null 2>&1
diff --git a/compiler/one-cmds/tests/one-optimize_neg_001.test b/compiler/one-cmds/tests/one-optimize_neg_001.test
index f0b5563c7..c67e3d489 100644
--- a/compiler/one-cmds/tests/one-optimize_neg_001.test
+++ b/compiler/one-cmds/tests/one-optimize_neg_001.test
@@ -39,7 +39,7 @@ rm -rf ${outputfile}
 rm -rf ${outputfile}.log
 
 # run test
-one-optimize --O1 \
+one-optimize --resolve_customop_add \
 --input_path ${inputfile} \
 --output_path ${outputfile} > ${filename}.log 2>&1
 
diff --git a/compiler/one-cmds/tests/one-optimize_neg_002.test b/compiler/one-cmds/tests/one-optimize_neg_002.test
index 72f306e20..a1ef70216 100644
--- a/compiler/one-cmds/tests/one-optimize_neg_002.test
+++ b/compiler/one-cmds/tests/one-optimize_neg_002.test
@@ -39,7 +39,7 @@ rm -rf ${outputfile}
 rm -rf ${outputfile}.log
 
 # run test
-one-optimize --O1 \
+one-optimize --resolve_customop_add \
 --input_path ${inputfile} \
 --output_path ${outputfile} > ${filename}.log 2>&1
 
diff --git a/compiler/one-cmds/tests/one-optimize_neg_003.test b/compiler/one-cmds/tests/one-optimize_neg_003.test
index 3fe7d330e..668a6c29d 100644
--- a/compiler/one-cmds/tests/one-optimize_neg_003.test
+++ b/compiler/one-cmds/tests/one-optimize_neg_003.test
@@ -44,7 +44,7 @@ if [[ ! -s ${inputfile} ]]; then
 fi
 
 # run test
-one-optimize --O1 \
+one-optimize --resolve_customop_add \
 --input_path "${inputfile}" > "${filename}.log" 2>&1
 
 echo "${filename_ext} FAILED"
diff --git a/compiler/one-cmds/tests/one-optimize_neg_004.test b/compiler/one-cmds/tests/one-optimize_neg_004.test
index e73911b54..5abd4c553 100644
--- a/compiler/one-cmds/tests/one-optimize_neg_004.test
+++ b/compiler/one-cmds/tests/one-optimize_neg_004.test
@@ -39,7 +39,7 @@ rm -rf ${outputfile}
 rm -rf ${filename}.log
 
 # run test
-one-optimize --O1 \
+one-optimize --resolve_customop_add \
 --change_outputs non_existing_node_name \
 --input_path ${inputfile} \
 --output_path ${outputfile} > ${filename}.log 2>&1
diff --git a/compiler/one-cmds/tests/one-partition_001.test b/compiler/one-cmds/tests/one-partition_001.test
new file mode 100644
index 000000000..a6fba07d7
--- /dev/null
+++ b/compiler/one-cmds/tests/one-partition_001.test
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+testmodel="Net_InstanceNorm_003"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="${testmodel}.circle"
+partfile="${testmodel}.part"
+outputfile="${testmodel}.conn.json"
+
+rm -rf  ${testmodel}.000*
+rm -rf  ${testmodel}.conn.*
+rm -rf  ${testmodel}.*.log
+
+# run test
+one-partition \
+--input_file ${inputfile} \
+--part_file ${partfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-partition_neg_001.test b/compiler/one-cmds/tests/one-partition_neg_001.test
new file mode 100644
index 000000000..d54a94fa2
--- /dev/null
+++ b/compiler/one-cmds/tests/one-partition_neg_001.test
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid .part file (wrong comply value)
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+testmodel="Net_InstanceNorm_003"
+
+trap_err_onexit()
+{
+  if grep -q "ERROR" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="${testmodel}.circle"
+partfile="${testmodel}.neg.part"
+outputfile="${testmodel}.conn.json"
+
+rm -rf  ${testmodel}.000*
+rm -rf  ${testmodel}.conn.*
+rm -rf  ${testmodel}.*.log
+rm -rf ${filename}.log
+
+# run test
+one-partition \
+--input_file ${inputfile} \
+--part_file ${partfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-partition_neg_002.test b/compiler/one-cmds/tests/one-partition_neg_002.test
new file mode 100644
index 000000000..23fe84c05
--- /dev/null
+++ b/compiler/one-cmds/tests/one-partition_neg_002.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid .cfg file (no one-partition section)
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+testmodel="Net_InstanceNorm_003"
+
+trap_err_onexit()
+{
+  if grep -q "'one-partition' section" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+cfgfile="${testmodel}.neg.cfg"
+
+rm -rf  ${testmodel}.000*
+rm -rf  ${testmodel}.conn.*
+rm -rf  ${testmodel}.*.log
+rm -rf ${filename}.log
+
+# run test
+one-partition -C ${cfgfile}> ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_010.test b/compiler/one-cmds/tests/one-quantize_010.test
new file mode 100644
index 000000000..1095ba0a0
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_010.test
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+check_message()
+{
+  if grep -q "MPEIR for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  trap_err_onexit
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.one-quantize_010.q.circle"
+datafile="./inception_v3_test_data.h5"
+
+rm -rf ${outputfile}
+
+# to create inception_v3.circle
+if [[ ! -s ${inputfile} ]]; then
+  /bin/bash one-import_001.test > /dev/null 2>&1
+  return_code=$?
+  if [[ ${return_code} != 0 ]]; then
+    trap_err_onexit
+  fi
+fi
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--input_path ${inputfile} \
+--input_data ${datafile} \
+--output_path ${outputfile} \
+--evaluate_result \
+--test_data ${datafile} \
+--print_mpeir > ${filename}.log 2>&1
+
+check_message
diff --git a/compiler/one-cmds/tests/one-quantize_011.test b/compiler/one-cmds/tests/one-quantize_011.test
new file mode 100644
index 000000000..34d7f57b5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_011.test
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+check_message()
+{
+  if grep -q "Mean Top-5 match ratio for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  trap_err_onexit
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.one-quantize_011.q.circle"
+datafile="./inception_v3_test_data.h5"
+
+rm -rf ${outputfile}
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--input_path ${inputfile} \
+--input_data ${datafile} \
+--output_path ${outputfile} \
+--evaluate_result \
+--test_data ${datafile} \
+--print_top5_match > ${filename}.log 2>&1
+
+check_message
diff --git a/compiler/one-cmds/tests/one-quantize_012.qconf.json b/compiler/one-cmds/tests/one-quantize_012.qconf.json
new file mode 100644
index 000000000..4a15b04f5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_012.qconf.json
@@ -0,0 +1,16 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "names" : ["InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu;InceptionV3/InceptionV3/Conv2d_2b_3x3/BatchNorm/FusedBatchNorm;InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D;InceptionV3/InceptionV3/Conv2d_2b_3x3/Conv2D",
+            "InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool",
+            "InceptionV3/InceptionV3/Mixed_5b/concat",
+            "InceptionV3/InceptionV3/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool",
+            "InceptionV3/InceptionV3/Mixed_7c/concat",
+            "InceptionV3/Predictions/Reshape_1"],
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/one-cmds/tests/one-quantize_012.test b/compiler/one-cmds/tests/one-quantize_012.test
new file mode 100644
index 000000000..fba18acc5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_012.test
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.one-quantize_012.q.circle"
+
+rm -rf ${outputfile}
+
+# run test without input data
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--quant_config one-quantize_012.qconf.json \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_013.qconf.json b/compiler/one-cmds/tests/one-quantize_013.qconf.json
new file mode 100644
index 000000000..4a15b04f5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_013.qconf.json
@@ -0,0 +1,16 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "names" : ["InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu;InceptionV3/InceptionV3/Conv2d_2b_3x3/BatchNorm/FusedBatchNorm;InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D;InceptionV3/InceptionV3/Conv2d_2b_3x3/Conv2D",
+            "InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool",
+            "InceptionV3/InceptionV3/Mixed_5b/concat",
+            "InceptionV3/InceptionV3/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool",
+            "InceptionV3/InceptionV3/Mixed_7c/concat",
+            "InceptionV3/Predictions/Reshape_1"],
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/one-cmds/tests/one-quantize_013.test b/compiler/one-cmds/tests/one-quantize_013.test
new file mode 100644
index 000000000..fd443d627
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_013.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# quantized_dtype and granularity are given by qconfig file
+# (not by command line interface)
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.one-quantize_013.q.circle"
+
+rm -rf ${outputfile}
+
+# run test without input data
+# quantized_dtype and granularity are not given here
+one-quantize \
+--input_dtype float32 \
+--quant_config one-quantize_013.qconf.json \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_014.test b/compiler/one-cmds/tests/one-quantize_014.test
new file mode 100644
index 000000000..518c32841
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_014.test
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test if `circle-eval-diff` supports directory input.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+check_message()
+{
+  if grep -q "Mean Top-5 match ratio for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  trap_err_onexit
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.one-quantize_014.q.circle"
+datadir="./raw_files/"
+
+rm -rf ${outputfile}
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--input_path ${inputfile} \
+--input_data ${datadir} \
+--input_data_format dir \
+--output_path ${outputfile} \
+--evaluate_result \
+--test_data ${datadir} \
+--print_top5_match > ${filename}.log 2>&1
+
+check_message
diff --git a/compiler/one-cmds/tests/one-quantize_015.test b/compiler/one-cmds/tests/one-quantize_015.test
new file mode 100644
index 000000000..bb45b5722
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_015.test
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test if --fake_quantize option works well
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.mat.q8.circle"
+outputfile="./inception_v3.one-quantize_015.fq.circle"
+
+rm -rf ${outputfile}
+
+# run test
+one-quantize \
+--fake_quantize \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_neg_019.test b/compiler/one-cmds/tests/one-quantize_neg_019.test
index ac920a4fe..e182edf78 100644
--- a/compiler/one-cmds/tests/one-quantize_neg_019.test
+++ b/compiler/one-cmds/tests/one-quantize_neg_019.test
@@ -42,7 +42,7 @@ one-quantize \
 --input_dtype float32 \
 --quantized_dtype int16 \
 --granularity channel \
---input_type float32 \
+--input_type float64 \
 --input_path ${inputfile} \
 --output_path ${outputfile} > ${filename}.log 2>&1
 
diff --git a/compiler/one-cmds/tests/one-quantize_neg_020.test b/compiler/one-cmds/tests/one-quantize_neg_020.test
new file mode 100644
index 000000000..27b11c3e6
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_020.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# check error message is printed when qconfig file is not json
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Failed to decode" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.quantized.neg_020.circle"
+
+rm -rf ${outputfile}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quant_config one-quantize_neg_020.test \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_008.cfg b/compiler/one-cmds/tests/onecc_008.cfg
index 0be026e6e..020e274e1 100644
--- a/compiler/one-cmds/tests/onecc_008.cfg
+++ b/compiler/one-cmds/tests/onecc_008.cfg
@@ -15,7 +15,6 @@ output_path=test_onnx_model.circle
 [one-optimize]
 input_path=test_onnx_model.circle
 output_path=test_onnx_model.opt.circle
-all=True
 remove_redundant_transpose=True
 
 [one-codegen]
diff --git a/compiler/one-cmds/tests/onecc_009.cfg b/compiler/one-cmds/tests/onecc_009.cfg
index a17ae59cb..86121c557 100644
--- a/compiler/one-cmds/tests/onecc_009.cfg
+++ b/compiler/one-cmds/tests/onecc_009.cfg
@@ -15,7 +15,6 @@ output_path=onnx_conv2d_conv2d.circle
 [one-optimize]
 input_path=onnx_conv2d_conv2d.circle
 output_path=onnx_conv2d_conv2d.opt.circle
-all=True
 remove_redundant_transpose=True
 convert_nchw_to_nhwc=True
 
diff --git a/compiler/one-cmds/tests/onecc_024.cfg b/compiler/one-cmds/tests/onecc_024.cfg
new file mode 100644
index 000000000..7b4b1a80a
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_024.cfg
@@ -0,0 +1,22 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+make_batchnorm_gamma_positive=False
diff --git a/compiler/one-cmds/tests/onecc_024.test b/compiler/one-cmds/tests/onecc_024.test
new file mode 100644
index 000000000..1f5daa13e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_024.test
@@ -0,0 +1,77 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use `OONECC_024` optimization option
+
+: '
+This test assumes below directories.
+
+[one hierarchy]
+    one
+    ├── backends
+    ├── bin
+    ├── doc
+    ├── include
+    ├── lib
+    ├── optimization
+    └── test # pwd
+'
+
+OPT_ALREADY_EXIST=true
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+clean_envir()
+{
+  rm -rf ../optimization/OONECC_024.cfg
+  if [ "$OPT_ALREADY_EXIST" = false ]; then
+    rm -rf ../optimization
+  fi
+}
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  clean_envir
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_024.cfg"
+outputfile="inception_v3.opt.circle"
+
+rm -rf ${outputfile}
+
+if [ ! -d "../optimization" ]; then
+  mkdir -p ../optimization
+  OPT_ALREADY_EXIST=false
+fi
+
+cp OONECC_024.cfg ../optimization
+
+# run test
+LUCI_LOG=5 onecc -C ${configfile} -OONECC_024 > ${filename}.log 2>&1
+
+clean_envir
+
+if grep -q "MakeBatchNormGammaPositivePass" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/onecc_025.cfg b/compiler/one-cmds/tests/onecc_025.cfg
new file mode 100644
index 000000000..4776ea80e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_025.cfg
@@ -0,0 +1,20 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
diff --git a/compiler/one-cmds/tests/onecc_025.test b/compiler/one-cmds/tests/onecc_025.test
new file mode 100644
index 000000000..396f40cea
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_025.test
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-optimize with the configuration file that includes `onecc` section
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_001.cfg"
+outputfile="inception_v3.opt.circle"
+
+# run test
+onecc -C ${configfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_026.cfg b/compiler/one-cmds/tests/onecc_026.cfg
new file mode 100644
index 000000000..c27a13654
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_026.cfg
@@ -0,0 +1,16 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+
+[one-quantize]
+input_path=inception_v3.circle
+output_path=inception_v3.onecc_026.q.circle
+input_data=inception_v3_test_data.h5
+evaluate_result=True
+test_data=inception_v3_test_data.h5
+print_mpeir=True
diff --git a/compiler/one-cmds/tests/onecc_026.test b/compiler/one-cmds/tests/onecc_026.test
new file mode 100644
index 000000000..84cfa4146
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_026.test
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+check_message()
+{
+  if grep -q "MPEIR for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  trap_err_onexit
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_026.cfg"
+outputfile="inception_v3.onecc_026.q.circle"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+check_message
diff --git a/compiler/one-cmds/tests/onecc_027.cfg b/compiler/one-cmds/tests/onecc_027.cfg
new file mode 100644
index 000000000..d3f6b5e82
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_027.cfg
@@ -0,0 +1,15 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+one-profile=False
+one-infer=True
+
+[one-infer]
+backend=dummy
+command=test_onnx_model.bin
diff --git a/compiler/one-cmds/tests/onecc_027.test b/compiler/one-cmds/tests/onecc_027.test
new file mode 100644
index 000000000..e727359ef
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_027.test
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-infer
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-profile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_027.cfg"
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+# run test
+onecc -C ${configfile} > ${filename}.log
+
+rm -rf ../bin/dummy-infer
+
+if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+  echo "${filename_ext} SUCCESS"
+  exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/onecc_028.test b/compiler/one-cmds/tests/onecc_028.test
new file mode 100644
index 000000000..10ce1583b
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_028.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-optimize -> one-pack
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_028.workflow.json"
+outputfile="inception_v3_pkg"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_028.workflow.json b/compiler/one-cmds/tests/onecc_028.workflow.json
new file mode 100644
index 000000000..84bfd01fa
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_028.workflow.json
@@ -0,0 +1,37 @@
+{
+    "workflows": [
+        "MY_WORKFLOW"
+    ],
+    "MY_WORKFLOW": {
+        "steps": [
+            "IMPORT_TF",
+            "OPTIMIZE",
+            "PACK"
+        ],
+        "IMPORT_TF": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        },
+        "OPTIMIZE": {
+            "one-cmd": "one-optimize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.opt.circle"
+            }
+        },
+        "PACK": {
+            "one-cmd": "one-pack",
+            "commands": {
+                "input_path": "inception_v3.opt.circle",
+                "output_path": "inception_v3_pkg"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_029.test b/compiler/one-cmds/tests/onecc_029.test
new file mode 100644
index 000000000..9bab1a1ee
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_029.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_029.workflow.json"
+outputfile="inception_v3.quantized.circle"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_029.workflow.json b/compiler/one-cmds/tests/onecc_029.workflow.json
new file mode 100644
index 000000000..65c9ea662
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_029.workflow.json
@@ -0,0 +1,30 @@
+{
+    "workflows": [
+        "QUANTIZE_WORKFLOW"
+    ],
+    "QUANTIZE_WORKFLOW": {
+        "steps": [
+            "IMPORT_TF",
+            "QUANTIZE"
+        ],
+        "IMPORT_TF": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        },
+        "QUANTIZE": {
+            "one-cmd": "one-quantize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.quantized.circle",
+                "input_data": "inception_v3_test_data.h5"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_030.test b/compiler/one-cmds/tests/onecc_030.test
new file mode 100644
index 000000000..c0aa56a51
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_030.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_030.workflow.json"
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_030.workflow.json b/compiler/one-cmds/tests/onecc_030.workflow.json
new file mode 100644
index 000000000..111a1b034
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_030.workflow.json
@@ -0,0 +1,29 @@
+{
+    "workflows": [
+        "codegen_wf"
+    ],
+    "codegen_wf": {
+        "steps": [
+            "import_tf",
+            "codegen"
+        ],
+        "import_tf": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        },
+        "codegen": {
+            "one-cmd": "one-codegen",
+            "commands": {
+                "backend": "dummy",
+                "command": "-o sample.tvn inception_v3.circle"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_031.test b/compiler/one-cmds/tests/onecc_031.test
new file mode 100644
index 000000000..7a1c670c8
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_031.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tflite -> one-optimize -> one-codgen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_031.workflow.json"
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_031.workflow.json b/compiler/one-cmds/tests/onecc_031.workflow.json
new file mode 100644
index 000000000..83d52b942
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_031.workflow.json
@@ -0,0 +1,33 @@
+{
+    "workflows": [
+        "wf"
+    ],
+    "wf": {
+        "steps": [
+            "import",
+            "optimize",
+            "codegen"
+        ],
+        "import": {
+            "one-cmd": "one-import-tflite",
+            "commands": {
+                "input_path": "inception_v3.tflite",
+                "output_path": "inception_v3.circle"
+            }
+        },
+        "optimize": {
+            "one-cmd": "one-optimize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.opt.circle"
+            }
+        },
+        "codegen": {
+            "one-cmd": "one-codegen",
+            "commands": {
+                "backend": "dummy",
+                "command": "-o sample.tvn inception_v3.opt.circle"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_032.test b/compiler/one-cmds/tests/onecc_032.test
new file mode 100644
index 000000000..89b6c41a5
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_032.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-optimize -> one-quantize -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_032.workflow.json"
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_032.workflow.json b/compiler/one-cmds/tests/onecc_032.workflow.json
new file mode 100644
index 000000000..08d3f0f5c
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_032.workflow.json
@@ -0,0 +1,42 @@
+{
+    "workflows": [
+        "wf"
+    ],
+    "wf": {
+        "steps": [
+            "import",
+            "optimize",
+            "quantize",
+            "codegen"
+        ],
+        "import": {
+            "one-cmd": "one-import-tflite",
+            "commands": {
+                "input_path": "inception_v3.tflite",
+                "output_path": "inception_v3.circle"
+            }
+        },
+        "optimize": {
+            "one-cmd": "one-optimize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.opt.circle"
+            }
+        },
+        "quantize": {
+            "one-cmd": "one-quantize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.quantized.circle",
+                "input_data": "inception_v3_test_data.h5"
+            }
+        },
+        "codegen": {
+            "one-cmd": "one-codegen",
+            "commands": {
+                "backend": "dummy",
+                "command": "-o sample.tvn inception_v3.quantized.circle"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_033.test b/compiler/one-cmds/tests/onecc_033.test
new file mode 100644
index 000000000..635582f61
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_033.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-optimize -> one-quantize -> one-pack
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_033.workflow.json"
+outputfile="inception_v3_pkg"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_033.workflow.json b/compiler/one-cmds/tests/onecc_033.workflow.json
new file mode 100644
index 000000000..01233ffd9
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_033.workflow.json
@@ -0,0 +1,42 @@
+{
+    "workflows": [
+        "wf"
+    ],
+    "wf": {
+        "steps": [
+            "import",
+            "optimize",
+            "quantize",
+            "pack"
+        ],
+        "import": {
+            "one-cmd": "one-import-tflite",
+            "commands": {
+                "input_path": "inception_v3.tflite",
+                "output_path": "inception_v3.circle"
+            }
+        },
+        "optimize": {
+            "one-cmd": "one-optimize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.opt.circle"
+            }
+        },
+        "quantize": {
+            "one-cmd": "one-quantize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.quantized.circle",
+                "input_data": "inception_v3_test_data.h5"
+            }
+        },
+        "pack": {
+            "one-cmd": "one-pack",
+            "commands": {
+                "input_path": "inception_v3.quantized.circle",
+                "output_path": "inception_v3_pkg"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_034.test b/compiler/one-cmds/tests/onecc_034.test
new file mode 100644
index 000000000..e76654809
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_034.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-onnx -> one-optimize -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  rm -rf ../bin/dummy-compile
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_034.workflow.json"
+outputfile="onnx_conv2d_conv2d.bin"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_034.workflow.json b/compiler/one-cmds/tests/onecc_034.workflow.json
new file mode 100644
index 000000000..bc3cbbf58
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_034.workflow.json
@@ -0,0 +1,35 @@
+{
+    "workflows": [
+        "wf"
+    ],
+    "wf": {
+        "steps": [
+            "import",
+            "optimize",
+            "codegen"
+        ],
+        "import": {
+            "one-cmd": "one-import-onnx",
+            "commands": {
+                "input_path": "onnx_conv2d_conv2d.onnx",
+                "output_path": "onnx_conv2d_conv2d.circle"
+            }
+        },
+        "optimize": {
+            "one-cmd": "one-optimize",
+            "commands": {
+                "input_path": "onnx_conv2d_conv2d.circle",
+                "output_path": "onnx_conv2d_conv2d.opt.circle",
+                "remove_redundant_transpose": "True",
+                "convert_nchw_to_nhwc": "True"
+            }
+        },
+        "codegen": {
+            "one-cmd": "one-codegen",
+            "commands": {
+                "backend": "dummy",
+                "command": "-o onnx_conv2d_conv2d.bin onnx_conv2d_conv2d.opt.circle"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_035.test b/compiler/one-cmds/tests/onecc_035.test
new file mode 100644
index 000000000..762cdd31a
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_035.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf generates intermediate files
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_035.workflow.json"
+outputfile="inception_v3.alt.circle"
+intermfile="inception_v3.alt.tflite"
+
+rm -rf ${outputfile}
+rm -rf ${intermfile}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+if [[ ! -s "${intermfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_035.workflow.json b/compiler/one-cmds/tests/onecc_035.workflow.json
new file mode 100644
index 000000000..6abf1f32b
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_035.workflow.json
@@ -0,0 +1,22 @@
+{
+    "workflows": [
+        "wf"
+    ],
+    "wf": {
+        "steps": [
+            "import"
+        ],
+        "import": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.alt.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v1",
+                "save_intermediate": "True"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_036.test b/compiler/one-cmds/tests/onecc_036.test
new file mode 100644
index 000000000..865255e9f
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_036.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-onnx generates intermediate files
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_036.workflow.json"
+outputfile="test_onnx_model.circle"
+intermfile="test_onnx_model.tflite"
+
+rm -rf ${outputfile}
+rm -rf ${intermfile}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+if [[ ! -s "${intermfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_036.workflow.json b/compiler/one-cmds/tests/onecc_036.workflow.json
new file mode 100644
index 000000000..5fa29edb5
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_036.workflow.json
@@ -0,0 +1,18 @@
+{
+    "workflows": [
+        "wf"
+    ],
+    "wf": {
+        "steps": [
+            "import"
+        ],
+        "import": {
+            "one-cmd": "one-import-onnx",
+            "commands": {
+                "input_path": "test_onnx_model.onnx",
+                "output_path": "test_onnx_model.circle",
+                "save_intermediate": "True"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_037.test b/compiler/one-cmds/tests/onecc_037.test
new file mode 100644
index 000000000..52ea9e4c7
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_037.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-optimize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_037.workflow.json"
+outputfile="inception_v3.opt.circle"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_037.workflow.json b/compiler/one-cmds/tests/onecc_037.workflow.json
new file mode 100644
index 000000000..3317fb27a
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_037.workflow.json
@@ -0,0 +1,29 @@
+{
+    "workflows": [
+        "SIMPLE_WORKFLOW"
+    ],
+    "SIMPLE_WORKFLOW": {
+        "steps": [
+            "IMPORT",
+            "OPTIMIZE"
+        ],
+        "IMPORT": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        },
+        "OPTIMIZE": {
+            "one-cmd": "one-optimize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.opt.circle"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_038.test b/compiler/one-cmds/tests/onecc_038.test
new file mode 100644
index 000000000..6b8f7cf64
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_038.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_038.workflow.json"
+outputfile="inception_v3.list.quantized.circle"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_038.workflow.json b/compiler/one-cmds/tests/onecc_038.workflow.json
new file mode 100644
index 000000000..5ac515d00
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_038.workflow.json
@@ -0,0 +1,31 @@
+{
+    "workflows": [
+        "SIMPLE_WORKFLOW"
+    ],
+    "SIMPLE_WORKFLOW": {
+        "steps": [
+            "IMPORT",
+            "QUANTIZE"
+        ],
+        "IMPORT": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        },
+        "QUANTIZE": {
+            "one-cmd": "one-quantize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.list.quantized.circle",
+                "input_data": "datalist.txt",
+                "input_data_format": "list"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_039.test b/compiler/one-cmds/tests/onecc_039.test
new file mode 100644
index 000000000..7db9d901c
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_039.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-quantize quantizes the model and evaluates the result
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+check_message()
+{
+  if grep -q "MPEIR for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  trap_err_onexit
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_039.workflow.json"
+outputfile="inception_v3.onecc_039.q.circle"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+check_message
diff --git a/compiler/one-cmds/tests/onecc_039.workflow.json b/compiler/one-cmds/tests/onecc_039.workflow.json
new file mode 100644
index 000000000..55ef56988
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_039.workflow.json
@@ -0,0 +1,21 @@
+{
+    "workflows": [
+        "SIMPLE_WORKFLOW"
+    ],
+    "SIMPLE_WORKFLOW": {
+        "steps": [
+            "QUANTIZE"
+        ],
+        "QUANTIZE": {
+            "one-cmd": "one-quantize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_026.q.circle",
+                "input_data": "inception_v3_test_data.h5",
+                "evaluate_result": "True",
+                "test_data": "inception_v3_test_data.h5",
+                "print_mpeir": "True"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_040.cfg b/compiler/one-cmds/tests/onecc_040.cfg
new file mode 100644
index 000000000..4776ea80e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_040.cfg
@@ -0,0 +1,20 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
diff --git a/compiler/one-cmds/tests/onecc_040.test b/compiler/one-cmds/tests/onecc_040.test
new file mode 100644
index 000000000..2f7567730
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_040.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow with cfg reference
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_040.workflow.json"
+outputfile="inception_v3.opt.circle"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_040.workflow.json b/compiler/one-cmds/tests/onecc_040.workflow.json
new file mode 100644
index 000000000..2d4119b21
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_040.workflow.json
@@ -0,0 +1,10 @@
+{
+    "workflows": [
+        "MY_WORKFLOW"
+    ],
+    "MY_WORKFLOW": {
+        "cfg-reference": {
+            "path": "onecc_040.cfg"
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_041.cfg b/compiler/one-cmds/tests/onecc_041.cfg
new file mode 100644
index 000000000..16135f074
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_041.cfg
@@ -0,0 +1,16 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3_without_opt.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
diff --git a/compiler/one-cmds/tests/onecc_041.test b/compiler/one-cmds/tests/onecc_041.test
new file mode 100644
index 000000000..791dd12ca
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_041.test
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflows
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+check_message()
+{
+  if grep -q "Do inference of inception_v3_without_opt\.circle" "${filename}.log" &&
+  grep -q "Do inference of inception_v3\.opt\.circle" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  trap_err_onexit
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_041.workflow.json"
+outputfile1="inception_v3_without_opt.circle"
+outputfile2="inception_v3.opt.circle"
+
+cp dummy-inferV2 ../bin/dummy-inferV2
+
+rm -rf ${outputfile1} {outputfile2}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+rm -rf ../bin/dummy-inferV2
+
+if [[ ! -s "${outputfile1}" ]] && [[ ! -s "${outputfile2}" ]]; then
+  trap_err_onexit
+fi
+
+check_message
diff --git a/compiler/one-cmds/tests/onecc_041.workflow.json b/compiler/one-cmds/tests/onecc_041.workflow.json
new file mode 100644
index 000000000..7dfc1c664
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_041.workflow.json
@@ -0,0 +1,61 @@
+{
+    "workflows": [
+        "WITHOUT_OPT",
+        "WITH_OPT",
+        "INFER"
+    ],
+    "INFER": {
+        "run-after": [
+            "WITHOUT_OPT",
+            "WITH_OPT"
+        ],
+        "steps": [
+            "INFER1",
+            "INFER2"
+        ],
+        "INFER1": {
+            "one-cmd": "one-infer",
+            "commands" : {
+                "driver": "dummy-inferV2",
+                "command": "inception_v3_without_opt.circle"
+            }
+        },
+        "INFER2": {
+            "one-cmd": "one-infer",
+            "commands": {
+                "driver": "dummy-inferV2",
+                "command": "inception_v3.opt.circle"
+            }
+        }
+    },
+    "WITHOUT_OPT": {
+        "cfg-reference": {
+            "path": "onecc_041.cfg"
+        }
+    },
+    "WITH_OPT": {
+        "steps": [
+            "IMPORT_TF",
+            "OPTIMIZE"
+        ],
+        "IMPORT_TF": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        },
+        "OPTIMIZE": {
+            "one-cmd": "one-optimize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.opt.circle"
+            }
+        }
+    }
+    
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_009.test b/compiler/one-cmds/tests/onecc_neg_009.test
new file mode 100644
index 000000000..54dd129e4
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_009.test
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Valid optimization option but invalid configuration file path
+
+: '
+This test assumes below directories.
+
+[one hierarchy]
+    one
+    ├── backends
+    ├── bin
+    ├── doc
+    ├── include
+    ├── lib
+    ├── optimization
+    └── test # pwd
+'
+
+OPT_ALREADY_EXIST=true
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  rm -rf ../optimization/OONECC_NEG_009.cfg
+  if [ "$OPT_ALREADY_EXIST" = false ]; then
+    rm -rf ../optimization
+  fi
+  if grep -q "Not found given configuration file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+if [ ! -d "../optimization" ]; then
+  mkdir -p ../optimization
+  OPT_ALREADY_EXIST=false
+fi
+
+
+touch ../optimization/OONECC_NEG_009.cfg
+
+configfile=".."
+
+# run test
+onecc -C ${configfile} -OONECC_NEG_009 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_010.test b/compiler/one-cmds/tests/onecc_neg_010.test
new file mode 100644
index 000000000..ddad5e6de
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_010.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Invalid optimization option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Invalid optimization option" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile=".."
+
+# run test
+onecc -C ${configfile} -OONECC_NEG_010 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_011.cfg b/compiler/one-cmds/tests/onecc_neg_011.cfg
new file mode 100644
index 000000000..b5873245b
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_011.cfg
@@ -0,0 +1,13 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+wrong_opt=True
diff --git a/compiler/one-cmds/tests/onecc_neg_011.test b/compiler/one-cmds/tests/onecc_neg_011.test
new file mode 100644
index 000000000..3f043a77e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_011.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# generate error for unrecognized opitmization option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "following arguments are unrecognized" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_011.cfg"
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_012.cfg b/compiler/one-cmds/tests/onecc_neg_012.cfg
new file mode 100644
index 000000000..fdc73ef43
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_012.cfg
@@ -0,0 +1,15 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+one-profile=False
+one-infer=True
+
+[one-infer]
+driver=dummy-infer
+backend=dummy
+command="dummy arguments"
diff --git a/compiler/one-cmds/tests/onecc_neg_012.test b/compiler/one-cmds/tests/onecc_neg_012.test
new file mode 100644
index 000000000..9feca5f54
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_012.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Check driver and backend option is mutually exclusive
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "\-d and -b options are mutually exclusive" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_012.cfg"
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_013.test b/compiler/one-cmds/tests/onecc_neg_013.test
new file mode 100644
index 000000000..0dd8a0fdd
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_013.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with missing workflow file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Not found given workflow file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_013.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_014.test b/compiler/one-cmds/tests/onecc_neg_014.test
new file mode 100644
index 000000000..2ed5dcbf5
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_014.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# invalid workflow file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Invalid workflow file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_014.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_014.workflow.json b/compiler/one-cmds/tests/onecc_neg_014.workflow.json
new file mode 100644
index 000000000..8d4fd431e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_014.workflow.json
@@ -0,0 +1,3 @@
+{
+    INVALID JSON FILE
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_015.test b/compiler/one-cmds/tests/onecc_neg_015.test
new file mode 100644
index 000000000..079ba677a
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_015.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflow file has invalid key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Not found" "${filename}.log" &&
+  grep -q "key in workflow file" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_015.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_015.workflow.json b/compiler/one-cmds/tests/onecc_neg_015.workflow.json
new file mode 100644
index 000000000..4cb752e4e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_015.workflow.json
@@ -0,0 +1,21 @@
+{
+    "workflowsssssss": [
+        "SIMPLE_WORKFLOW"
+    ],
+    "SIMPLE_WORKFLOW": {
+        "steps": [
+            "QUANTIZE"
+        ],
+        "QUANTIZE": {
+            "one-cmd": "one-quantize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_026.q.circle",
+                "input_data": "inception_v3_test_data.h5",
+                "evaluate_result": "True",
+                "test_data": "inception_v3_test_data.h5",
+                "print_mpeir": "True"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_016.test b/compiler/one-cmds/tests/onecc_neg_016.test
new file mode 100644
index 000000000..c52763f47
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_016.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflow file has invalid key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Not found" "${filename}.log" &&
+  grep -q "key listed in" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_016.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_016.workflow.json b/compiler/one-cmds/tests/onecc_neg_016.workflow.json
new file mode 100644
index 000000000..c929cf38c
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_016.workflow.json
@@ -0,0 +1,21 @@
+{
+    "workflows": [
+        "SIMPLE_WORKFLOW"
+    ],
+    "SIMPLE_WORKFLOWWWWW": {
+        "steps": [
+            "QUANTIZE"
+        ],
+        "QUANTIZE": {
+            "one-cmd": "one-quantize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_026.q.circle",
+                "input_data": "inception_v3_test_data.h5",
+                "evaluate_result": "True",
+                "test_data": "inception_v3_test_data.h5",
+                "print_mpeir": "True"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_017.test b/compiler/one-cmds/tests/onecc_neg_017.test
new file mode 100644
index 000000000..2f173d2f6
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_017.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflow file has invalid key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Each workflow should have either" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_017.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_017.workflow.json b/compiler/one-cmds/tests/onecc_neg_017.workflow.json
new file mode 100644
index 000000000..22f1415e9
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_017.workflow.json
@@ -0,0 +1,18 @@
+{
+    "workflows": [
+        "SIMPLE_WORKFLOW"
+    ],
+    "SIMPLE_WORKFLOW": {
+        "QUANTIZE": {
+            "one-cmd": "one-quantize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.onecc_026.q.circle",
+                "input_data": "inception_v3_test_data.h5",
+                "evaluate_result": "True",
+                "test_data": "inception_v3_test_data.h5",
+                "print_mpeir": "True"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_018.test b/compiler/one-cmds/tests/onecc_neg_018.test
new file mode 100644
index 000000000..bc2297ed0
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_018.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflow file has invalid key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "are exclusive key" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_018.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_018.workflow.json b/compiler/one-cmds/tests/onecc_neg_018.workflow.json
new file mode 100644
index 000000000..58cb88e17
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_018.workflow.json
@@ -0,0 +1,24 @@
+{
+    "workflows": [
+        "MY_WORKFLOW"
+    ],
+    "MY_WORKFLOW": {
+        "steps": [
+            "IMPORT_TF"
+        ],
+        "cfg-reference": {
+            "path": "/path/to/ini/format/file"
+        },
+        "IMPORT_TF": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_019.test b/compiler/one-cmds/tests/onecc_neg_019.test
new file mode 100644
index 000000000..11ef3a9ee
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_019.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflow file has invalid key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Each step should have" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_019.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_019.workflow.json b/compiler/one-cmds/tests/onecc_neg_019.workflow.json
new file mode 100644
index 000000000..aedeeecca
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_019.workflow.json
@@ -0,0 +1,21 @@
+{
+    "workflows": [
+        "MY_WORKFLOW"
+    ],
+    "MY_WORKFLOW": {
+        "steps": [
+            "IMPORT_TF"
+        ],
+        "IMPORT_TF": {
+            "one-cmddddddddd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_020.test b/compiler/one-cmds/tests/onecc_neg_020.test
new file mode 100644
index 000000000..7f5073d82
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_020.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflow file has invalid key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Each step should have" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_020.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_020.workflow.json b/compiler/one-cmds/tests/onecc_neg_020.workflow.json
new file mode 100644
index 000000000..d3446d38f
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_020.workflow.json
@@ -0,0 +1,21 @@
+{
+    "workflows": [
+        "MY_WORKFLOW"
+    ],
+    "MY_WORKFLOW": {
+        "steps": [
+            "IMPORT_TF"
+        ],
+        "IMPORT_TF": {
+            "one-cmd": "one-import-tf",
+            "commandssssssssss": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_021.test b/compiler/one-cmds/tests/onecc_neg_021.test
new file mode 100644
index 000000000..e9d4baaee
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_021.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflows have a cycle
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Workflows should not have a cycle" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_021.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_021.workflow.json b/compiler/one-cmds/tests/onecc_neg_021.workflow.json
new file mode 100644
index 000000000..6d21111af
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_021.workflow.json
@@ -0,0 +1,44 @@
+{
+    "workflows": [
+        "CYCLE_WF1",
+        "CYCLE_WF2"
+    ],
+    "CYCLE_WF1": {
+        "run-after": [
+            "CYCLE_WF2"
+        ],
+        "steps": [
+            "IMPORT_TF"
+        ],
+        "IMPORT_TF": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        }
+    },
+    "CYCLE_WF2": {
+        "run-after": [
+            "CYCLE_WF1"
+        ],
+        "steps": [
+            "IMPORT_TF"
+        ],
+        "IMPORT_TF": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_022.cfg b/compiler/one-cmds/tests/onecc_neg_022.cfg
new file mode 100644
index 000000000..16135f074
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_022.cfg
@@ -0,0 +1,16 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3_without_opt.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
diff --git a/compiler/one-cmds/tests/onecc_neg_022.test b/compiler/one-cmds/tests/onecc_neg_022.test
new file mode 100644
index 000000000..540071729
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_022.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflows have a cycle
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Workflows should not have a cycle" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_022.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_022.workflow.json b/compiler/one-cmds/tests/onecc_neg_022.workflow.json
new file mode 100644
index 000000000..2e056acf1
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_022.workflow.json
@@ -0,0 +1,63 @@
+{
+    "workflows": [
+        "WITHOUT_OPT",
+        "WITH_OPT",
+        "INFER"
+    ],
+    "INFER": {
+        "run-after": [
+            "WITHOUT_OPT",
+            "WITH_OPT"
+        ],
+        "steps": [
+            "INFER1",
+            "INFER2"
+        ],
+        "INFER1": {
+            "one-cmd": "one-infer",
+            "commands" : {
+                "driver": "dummy-inferV2",
+                "command": "inception_v3_without_opt.circle"
+            }
+        },
+        "INFER2": {
+            "one-cmd": "one-infer",
+            "commands": {
+                "driver": "dummy-inferV2",
+                "command": "inception_v3.opt.circle"
+            }
+        }
+    },
+    "WITHOUT_OPT": {
+        "cfg-reference": {
+            "path": "onecc_041.cfg"
+        }
+    },
+    "WITH_OPT": {
+        "run-after": [
+            "WITHOUT_OPT"
+        ],
+        "steps": [
+            "IMPORT_TF",
+            "OPTIMIZE"
+        ],
+        "IMPORT_TF": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        },
+        "OPTIMIZE": {
+            "one-cmd": "one-optimize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.opt.circle"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_023.test b/compiler/one-cmds/tests/onecc_neg_023.test
new file mode 100644
index 000000000..09717e8ad
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_023.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflows have wrong optimize option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "Change outputs failed" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_023.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_023.workflow.json b/compiler/one-cmds/tests/onecc_neg_023.workflow.json
new file mode 100644
index 000000000..056e704fd
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_023.workflow.json
@@ -0,0 +1,30 @@
+{
+    "workflows": [
+        "WITH_OPT"
+    ],
+    "WITH_OPT": {
+        "steps": [
+            "IMPORT_TF",
+            "OPTIMIZE"
+        ],
+        "IMPORT_TF": {
+            "one-cmd": "one-import-tf",
+            "commands": {
+                "input_path": "inception_v3.pb",
+                "output_path": "inception_v3.circle",
+                "input_arrays": "input",
+                "input_shapes": "1,299,299,3",
+                "output_arrays": "InceptionV3/Predictions/Reshape_1",
+                "converter_version": "v2"
+            }
+        },
+        "OPTIMIZE": {
+            "one-cmd": "one-optimize",
+            "commands": {
+                "input_path": "inception_v3.circle",
+                "output_path": "inception_v3.opt.circle",
+                "change_outputs": "non_existing_node_name"
+            }
+        }
+    }
+}
diff --git a/compiler/one-cmds/tests/prepare_test_materials.sh b/compiler/one-cmds/tests/prepare_test_materials.sh
index c80c59834..c171cfe01 100644
--- a/compiler/one-cmds/tests/prepare_test_materials.sh
+++ b/compiler/one-cmds/tests/prepare_test_materials.sh
@@ -91,6 +91,20 @@ if [[ ! -s "onnx_conv2d_conv2d.onnx" ]]; then
     # https://github.com/Samsung/ONE/issues/5577#issuecomment-755078444
 fi
 
+if [[ ! -s "reshape_matmul.onnx" ]]; then
+    rm -rf reshape_matmul.zip
+    wget https://github.com/Samsung/ONE/files/9082878/reshape_matmul.zip
+    unzip reshape_matmul.zip
+    # https://github.com/Samsung/ONE/issues/9405#issuecomment-1180198137
+fi
+
+if [[ ! -s "Net_InstanceNorm_003.part" ]]; then
+    rm -rf Net_InstanceNorm_003.zip
+    wget https://github.com/Samsung/ONE/files/8608844/Net_InstanceNorm_003.zip
+    unzip Net_InstanceNorm_003.zip
+    # https://github.com/Samsung/ONE/issues/8570#issuecomment-1115804257
+fi
+
 function files_missing() {
     condition="test "
 
diff --git a/compiler/one-cmds/utils.py b/compiler/one-cmds/utils.py
index be0322aca..d204447fd 100644
--- a/compiler/one-cmds/utils.py
+++ b/compiler/one-cmds/utils.py
@@ -47,6 +47,25 @@ def _add_default_arg(parser):
     parser.add_argument('-S', '--section', type=str, help=argparse.SUPPRESS)
 
 
+def _add_default_arg_no_CS(parser):
+    """
+    This adds -v -V args only (no -C nor -S)
+    """
+    # version
+    parser.add_argument(
+        '-v',
+        '--version',
+        action='store_true',
+        help='show program\'s version number and exit')
+
+    # verbose
+    parser.add_argument(
+        '-V',
+        '--verbose',
+        action='store_true',
+        help='output additional information to stdout or stderr')
+
+
 def is_accumulated_arg(arg, driver):
     if driver == "one-quantize":
         accumulables = [
@@ -62,6 +81,43 @@ def _is_valid_attr(args, attr):
     return hasattr(args, attr) and getattr(args, attr)
 
 
+class Command:
+    def __init__(self, driver, args, log_file):
+        self.cmd = [driver]
+        self.driver = driver
+        self.args = args
+        self.log_file = log_file
+
+    # Add option if attrs are valid
+    # Option values are collected from self.args
+    def add_option_with_valid_args(self, option, attrs):
+        for attr in attrs:
+            if not _is_valid_attr(self.args, attr):
+                return self
+        self.cmd.append(option)
+        for attr in attrs:
+            self.cmd.append(getattr(self.args, attr))
+        return self
+
+    # Add option and values without any condition
+    def add_option_with_values(self, option, values):
+        self.cmd.append(option)
+        for value in values:
+            self.cmd.append(value)
+        return self
+
+    # Add option with no argument (ex: --verbose) if attr is valid
+    def add_noarg_option_if_valid_arg(self, option, attr):
+        if _is_valid_attr(self.args, attr):
+            self.cmd.append(option)
+        return self
+
+    # Run cmd and save logs
+    def run(self):
+        self.log_file.write((' '.join(self.cmd) + '\n').encode())
+        _run(self.cmd, err_prefix=self.driver, logfile=self.log_file)
+
+
 def _parse_cfg_and_overwrite(config_path, section, args):
     """
     parse given section of configuration file and set the values of args.
@@ -153,8 +209,7 @@ def _run(cmd, err_prefix=None, logfile=None):
         err_prefix: prefix to be put before every stderr lines
         logfile: file stream to which both of stdout and stderr lines will be written
     """
-    with subprocess.Popen(
-            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1) as p:
+    with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p:
         import select
         inputs = set([p.stdout, p.stderr])
         while inputs:
diff --git a/compiler/onnx-tools/CMakeLists.txt b/compiler/onnx-tools/CMakeLists.txt
index ac4500e0e..5935cdfbe 100644
--- a/compiler/onnx-tools/CMakeLists.txt
+++ b/compiler/onnx-tools/CMakeLists.txt
@@ -18,4 +18,10 @@ foreach(ONNX_TOOL IN ITEMS ${ONNX_TOOL_FILES})
 
   add_custom_target(${ONNX_TOOL_TARGET} ALL DEPENDS ${ONNX_TOOL_BIN})
 
+  install(FILES ${ONNX_TOOL_BIN}
+          PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                      GROUP_READ GROUP_EXECUTE
+                      WORLD_READ WORLD_EXECUTE
+          DESTINATION bin)
+
 endforeach(ONNX_TOOL)
diff --git a/compiler/pota-quantization-value-test/CMakeLists.txt b/compiler/pota-quantization-value-test/CMakeLists.txt
index 51fd9a391..96dfc8687 100644
--- a/compiler/pota-quantization-value-test/CMakeLists.txt
+++ b/compiler/pota-quantization-value-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 unset(QUANTIZATION_VALUE_TEST)
 unset(QUANTIZATION_VALUE_TEST_WITH_PARAM)
 unset(QUANTIZATION_CONFIG_VALUE_TEST)
diff --git a/compiler/record-minmax-conversion-test/CMakeLists.txt b/compiler/record-minmax-conversion-test/CMakeLists.txt
index 31b906142..636361405 100644
--- a/compiler/record-minmax-conversion-test/CMakeLists.txt
+++ b/compiler/record-minmax-conversion-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 unset(RECORD_MINMAX_CONVERSION_TEST)
 
 macro(addTest NAME)
diff --git a/compiler/record-minmax/driver/Driver.cpp b/compiler/record-minmax/driver/Driver.cpp
index c9f1d0ca7..faa402f01 100644
--- a/compiler/record-minmax/driver/Driver.cpp
+++ b/compiler/record-minmax/driver/Driver.cpp
@@ -34,62 +34,33 @@ int entry(const int argc, char **argv)
   arser::Arser arser(
     "Embedding min/max values of activations to the circle model for post-training quantization");
 
-  arser.add_argument("--version")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("Show version information and exit")
-    .exit_with(print_version);
-
-  arser.add_argument("-V", "--verbose")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("output additional information to stdout or stderr");
+  arser::Helper::add_version(arser, print_version);
+  arser::Helper::add_verbose(arser);
 
-  arser.add_argument("--input_model")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(true)
-    .help("Input model filepath");
+  arser.add_argument("--input_model").required(true).help("Input model filepath");
 
   arser.add_argument("--input_data")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(false)
     .help("Input data filepath. If not given, record-minmax will run with randomly generated data. "
           "Note that the random dataset does not represent inference workload, leading to poor "
           "model accuracy.");
 
-  arser.add_argument("--output_model")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .required(true)
-    .help("Output model filepath");
+  arser.add_argument("--output_model").required(true).help("Output model filepath");
 
   arser.add_argument("--min_percentile")
-    .nargs(1)
     .type(arser::DataType::FLOAT)
     .help("Record n'th percentile of min");
 
   arser.add_argument("--max_percentile")
-    .nargs(1)
     .type(arser::DataType::FLOAT)
     .help("Record n'th percentile of max");
 
-  arser.add_argument("--mode")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .help("Record mode. percentile (default) or moving_average");
+  arser.add_argument("--mode").help("Record mode. percentile (default) or moving_average");
 
   arser.add_argument("--input_data_format")
-    .nargs(1)
-    .type(arser::DataType::STR)
     .help("Input data format. h5/hdf5 (default) or list/filelist");
 
   arser.add_argument("--generate_profile_data")
     .nargs(0)
-    .required(false)
     .default_value(false)
     .help("This will turn on profiling data generation.");
 
diff --git a/compiler/record-minmax/include/RecordFunction.h b/compiler/record-minmax/include/RecordFunction.h
index ba199d071..5b993e4b3 100644
--- a/compiler/record-minmax/include/RecordFunction.h
+++ b/compiler/record-minmax/include/RecordFunction.h
@@ -18,7 +18,7 @@
 #include <cassert>
 #include <algorithm>
 #include <cmath>
-#include <numeric>
+#include <limits>
 #include <stdexcept>
 
 namespace record_minmax
diff --git a/compiler/record-minmax/src/MinMaxObserver.cpp b/compiler/record-minmax/src/MinMaxObserver.cpp
index 8288d3e5e..e6edbdca9 100644
--- a/compiler/record-minmax/src/MinMaxObserver.cpp
+++ b/compiler/record-minmax/src/MinMaxObserver.cpp
@@ -18,6 +18,7 @@
 
 #include <luci/IR/CircleOpcode.h>
 
+#include <limits>
 #include <math.h>
 
 using DataType = luci_interpreter::DataType;
@@ -75,7 +76,7 @@ void MinMaxObserver::postTensorWrite(const luci::CircleNode *node,
         // Reshape changes only shape of input tensor, efficiently is it a no-op.
         return;
       default:
-        throw std::runtime_error("Tensor's data type is not float");
+        throw std::runtime_error("Tensor's data type is not float. " + node->name());
     }
   }
 
diff --git a/compiler/record-minmax/src/RecordMinMax.cpp b/compiler/record-minmax/src/RecordMinMax.cpp
index 10a14516f..6dbf98dc6 100644
--- a/compiler/record-minmax/src/RecordMinMax.cpp
+++ b/compiler/record-minmax/src/RecordMinMax.cpp
@@ -186,7 +186,13 @@ void RecordMinMax::initialize(const std::string &input_model_path)
     throw std::runtime_error("Failed to verify circle '" + input_model_path + "'");
   }
 
-  _module = luci::Importer().importModule(circle::GetModel(model_data.data()));
+  const circle::Model *circle_model = circle::GetModel(model_data.data());
+  if (circle_model == nullptr)
+  {
+    throw std::runtime_error("Failed to load '" + input_model_path + "'");
+  }
+
+  _module = luci::Importer().importModule(circle_model);
 
   if (_module == nullptr)
   {
diff --git a/compiler/souschef/CMakeLists.txt b/compiler/souschef/CMakeLists.txt
index f57102f1f..8dcf4c2b8 100644
--- a/compiler/souschef/CMakeLists.txt
+++ b/compiler/souschef/CMakeLists.txt
@@ -1,13 +1,20 @@
 nnas_find_package(Protobuf QUIET)
+nnas_find_package(Fp16Source QUIET)
 
 if(NOT Protobuf_FOUND)
   message(STATUS "Build souschef: FAILED (missing Protobuf)")
   return()
 endif(NOT Protobuf_FOUND)
 
+if(NOT Fp16Source_FOUND)
+  message(STATUS "Build souschef: FAILED (missing Fp16Source)")
+  return()
+endif(NOT Fp16Source_FOUND)
+
 file(GLOB_RECURSE SOURCES "src/*.cpp")
 
 add_library(souschef STATIC ${SOURCES})
 set_target_properties(souschef PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(souschef PRIVATE ${Fp16Source_DIR}/include)
 target_include_directories(souschef PUBLIC include)
 target_link_libraries(souschef PUBLIC libprotobuf)
diff --git a/compiler/souschef/include/souschef/Data/Explicit.h b/compiler/souschef/include/souschef/Data/Explicit.h
index 7cbb773da..434d0ec2c 100644
--- a/compiler/souschef/include/souschef/Data/Explicit.h
+++ b/compiler/souschef/include/souschef/Data/Explicit.h
@@ -96,6 +96,41 @@ template <typename T> struct ExplicitDataChefFactory : public DataChefFactory
   }
 };
 
+class ExplicitFloat16DataChef final : public DataChef
+{
+public:
+  ExplicitFloat16DataChef()
+  {
+    // DO NOTHING
+  }
+
+public:
+  std::vector<uint8_t> generate(int32_t count) const override;
+
+public:
+  void insert(const float &value) { _values.emplace_back(value); }
+
+private:
+  // NOTE store values in float but will convert to uint16_t in generate()
+  std::vector<float> _values;
+};
+
+struct ExplicitFloat16DataChefFactory : public DataChefFactory
+{
+  std::unique_ptr<DataChef> create(const Arguments &args) const
+  {
+    std::unique_ptr<ExplicitFloat16DataChef> res{new ExplicitFloat16DataChef};
+
+    for (uint32_t n = 0; n < args.count(); ++n)
+    {
+      auto const value = to_number<float>(args.value(n));
+      res->insert(value);
+    }
+
+    return std::move(res);
+  }
+};
+
 } // namespace souschef
 
 #endif // __SOUSCHEF_DATA_EXPLICIT_H__
diff --git a/compiler/souschef/include/souschef/Data/Gaussian.h b/compiler/souschef/include/souschef/Data/Gaussian.h
index 8093b4c41..c9ac571f9 100644
--- a/compiler/souschef/include/souschef/Data/Gaussian.h
+++ b/compiler/souschef/include/souschef/Data/Gaussian.h
@@ -41,6 +41,22 @@ private:
   float _stddev;
 };
 
+class GaussianFloat16DataChef final : public DataChef
+{
+public:
+  GaussianFloat16DataChef(float mean, float stddev) : _mean{mean}, _stddev{stddev}
+  {
+    // DO NOTHING
+  }
+
+public:
+  std::vector<uint8_t> generate(int32_t count) const override;
+
+private:
+  float _mean;
+  float _stddev;
+};
+
 class GaussianInt32DataChef final : public DataChef
 {
 public:
@@ -109,6 +125,11 @@ struct GaussianUint8DataChefFactory : public DataChefFactory
   std::unique_ptr<DataChef> create(const Arguments &args) const;
 };
 
+struct GaussianFloat16DataChefFactory : public DataChefFactory
+{
+  std::unique_ptr<DataChef> create(const Arguments &args) const;
+};
+
 } // namespace souschef
 
 #endif // __SOUSCHEF_DATA_GAUSSIAN_H__
diff --git a/compiler/souschef/src/Explicit.cpp b/compiler/souschef/src/Explicit.cpp
index eb36cb7c3..3278ae3c3 100644
--- a/compiler/souschef/src/Explicit.cpp
+++ b/compiler/souschef/src/Explicit.cpp
@@ -19,6 +19,8 @@
 #include <string>
 #include <vector>
 
+#include <fp16.h>
+
 namespace souschef
 {
 
@@ -74,4 +76,23 @@ void ExplicitDataChef<std::string>::write_value(std::vector<uint8_t> &res, int32
   }
 }
 
+std::vector<uint8_t> ExplicitFloat16DataChef::generate(int32_t count) const
+{
+  std::vector<uint8_t> res;
+
+  for (uint32_t n = 0; n < count; ++n)
+  {
+    float const fvalue = (n < _values.size()) ? _values.at(n) : 0.0;
+    uint16_t const value = fp16_ieee_from_fp32_value(fvalue);
+    auto const arr = reinterpret_cast<const uint8_t *>(&value);
+
+    for (uint32_t b = 0; b < sizeof(uint16_t); ++b)
+    {
+      res.emplace_back(arr[b]);
+    }
+  }
+
+  return res;
+}
+
 } // namespace souschef
diff --git a/compiler/souschef/src/Gaussian.cpp b/compiler/souschef/src/Gaussian.cpp
index 32cbcff4d..53a62cabf 100644
--- a/compiler/souschef/src/Gaussian.cpp
+++ b/compiler/souschef/src/Gaussian.cpp
@@ -23,6 +23,8 @@
 #include <cassert>
 #include <stdexcept>
 
+#include <fp16.h>
+
 namespace souschef
 {
 
@@ -36,7 +38,7 @@ static std::vector<uint8_t> generate_gaussian(int32_t count, float mean, float s
   std::vector<uint8_t> res;
 
   constexpr float max_cap = std::numeric_limits<T>::max();
-  constexpr float min_cap = std::numeric_limits<T>::min();
+  constexpr float min_cap = std::numeric_limits<T>::lowest();
   for (uint32_t n = 0; n < count; ++n)
   {
     float raw_value = dist(rand);
@@ -69,6 +71,34 @@ std::vector<uint8_t> GaussianFloat32DataChef::generate(int32_t count) const
   return generate_gaussian<float>(count, _mean, _stddev);
 }
 
+std::vector<uint8_t> GaussianFloat16DataChef::generate(int32_t count) const
+{
+  auto time_stamp = std::chrono::system_clock::now().time_since_epoch().count();
+  auto seed = static_cast<std::minstd_rand::result_type>(time_stamp);
+
+  std::minstd_rand rand{static_cast<std::minstd_rand::result_type>(seed)};
+  std::normal_distribution<float> dist{_mean, _stddev};
+
+  std::vector<uint8_t> res;
+
+  constexpr float max_cap = 1e9;
+  constexpr float min_cap = -1e9;
+  for (uint32_t n = 0; n < count; ++n)
+  {
+    float raw_value = dist(rand);
+    const float capped_value = std::max(min_cap, std::min(max_cap, raw_value));
+    const uint16_t value = fp16_ieee_from_fp32_value(capped_value);
+    auto const arr = reinterpret_cast<const uint8_t *>(&value);
+
+    for (uint32_t b = 0; b < sizeof(uint16_t); ++b)
+    {
+      res.emplace_back(arr[b]);
+    }
+  }
+
+  return res;
+}
+
 std::vector<uint8_t> GaussianInt32DataChef::generate(int32_t count) const
 {
   return generate_gaussian<int32_t>(count, _mean, _stddev);
@@ -136,4 +166,17 @@ std::unique_ptr<DataChef> GaussianUint8DataChefFactory::create(const Arguments &
   return std::unique_ptr<DataChef>{new GaussianUint8DataChef{mean, stddev}};
 }
 
+std::unique_ptr<DataChef> GaussianFloat16DataChefFactory::create(const Arguments &args) const
+{
+  if (args.count() != 2)
+  {
+    throw std::runtime_error{"invalid argument count: two arguments (mean/stddev) are expected"};
+  }
+
+  auto const mean = to_number<float>(args.value(0));
+  auto const stddev = to_number<float>(args.value(1));
+
+  return std::unique_ptr<DataChef>{new GaussianFloat16DataChef{mean, stddev}};
+}
+
 } // namespace souschef
diff --git a/compiler/tf2circle-conversion-test/CMakeLists.txt b/compiler/tf2circle-conversion-test/CMakeLists.txt
index 27f2463f3..79a39873b 100644
--- a/compiler/tf2circle-conversion-test/CMakeLists.txt
+++ b/compiler/tf2circle-conversion-test/CMakeLists.txt
@@ -128,6 +128,10 @@ list(APPEND TEST_DEPS "${TEST_CONFIG}")
 # This "tf2circle_conversion_test_deps" target enforces CMake to generate all the dependencies during "build" phase
 add_custom_target(tf2circle_conversion_test_deps ALL DEPENDS ${TEST_DEPS})
 
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 # Run tests
 add_test(
   NAME tf2circle_conversion_test
diff --git a/compiler/tf2circle-dredd-pb-test/CMakeLists.txt b/compiler/tf2circle-dredd-pb-test/CMakeLists.txt
index 48b098e24..83596fade 100644
--- a/compiler/tf2circle-dredd-pb-test/CMakeLists.txt
+++ b/compiler/tf2circle-dredd-pb-test/CMakeLists.txt
@@ -132,6 +132,10 @@ list(APPEND DEPS "${TARGET_RULE_LIB}")
 # Generate dependencies
 add_custom_target(tf2circle_dredd_pb_deps ALL DEPENDS ${DEPS})
 
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 add_test(
   NAME tf2circle_dredd_pb_test
   COMMAND
diff --git a/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt b/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt
index 789e58535..427e57502 100644
--- a/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt
+++ b/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt
@@ -175,6 +175,10 @@ list(APPEND DEPS "${TARGET_RULE_LIB}")
 # Generate dependencies
 add_custom_target(tf2circle_dredd_pbtxt_deps ALL DEPENDS ${DEPS})
 
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 add_test(
   NAME tf2circle_dredd_pbtxt_test
   COMMAND
diff --git a/compiler/tf2circle-model-test/CMakeLists.txt b/compiler/tf2circle-model-test/CMakeLists.txt
index 2fb82236a..ad776a62b 100644
--- a/compiler/tf2circle-model-test/CMakeLists.txt
+++ b/compiler/tf2circle-model-test/CMakeLists.txt
@@ -100,6 +100,10 @@ list(APPEND DEPS "${TEST_RUNNER_SCRIPT}")
 ### Generate dependencies
 add_custom_target(tf2circle_model_test_deps ALL DEPENDS ${DEPS})
 
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 # NOTE This target is not built by default
 add_test(
   NAME tf2circle_model_test
diff --git a/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt b/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt
index b75c50772..ac9f14d70 100644
--- a/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt
+++ b/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 nnas_include(TargetRequire)
 
 unset(REQUIRED_TARGETS)
diff --git a/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt b/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt
index 87cf7836f..95a296ef8 100644
--- a/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt
+++ b/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 nnas_include(TargetRequire)
 
 unset(REQUIRED_TARGETS)
diff --git a/compiler/tf2tflite-value-pb-test/CMakeLists.txt b/compiler/tf2tflite-value-pb-test/CMakeLists.txt
index 41974f72c..a6c451e0b 100644
--- a/compiler/tf2tflite-value-pb-test/CMakeLists.txt
+++ b/compiler/tf2tflite-value-pb-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 nnas_include(TargetRequire)
 
 unset(REQUIRED_TARGETS)
diff --git a/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt b/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt
index 2e76e21d3..fde3e60b4 100644
--- a/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt
+++ b/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 nnas_include(TargetRequire)
 
 unset(REQUIRED_TARGETS)
diff --git a/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt b/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt
index 0b4739374..97aa07fd3 100644
--- a/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt
+++ b/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 nncc_find_resource(TensorFlowTests)
 
 #
diff --git a/compiler/tf2tfliteV2/tf2tfliteV2.py b/compiler/tf2tfliteV2/tf2tfliteV2.py
index 6b578ad53..2bcf55328 100755
--- a/compiler/tf2tfliteV2/tf2tfliteV2.py
+++ b/compiler/tf2tfliteV2/tf2tfliteV2.py
@@ -110,6 +110,12 @@ def _get_parser():
         type=str,
         help="Names of the output arrays, comma-separated.")
 
+    # experimental options
+    parser.add_argument(
+        "--experimental_disable_batchmatmul_unfold",
+        action="store_true",
+        help="Experimental disable BatchMatMul unfold")
+
     # Set default value
     parser.set_defaults(model_format="graph_def")
     return parser
@@ -228,6 +234,9 @@ def _v2_convert(flags):
         keras_model = tf.keras.models.load_model(flags.input_path)
         converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
 
+    if flags.experimental_disable_batchmatmul_unfold:
+        converter._experimental_disable_batchmatmul_unfold = True
+
     converter.allow_custom_ops = True
     converter.experimental_new_converter = True
 
diff --git a/compiler/tfl-inspect/CMakeLists.txt b/compiler/tfl-inspect/CMakeLists.txt
index 9e1cb720f..2c6e3a147 100644
--- a/compiler/tfl-inspect/CMakeLists.txt
+++ b/compiler/tfl-inspect/CMakeLists.txt
@@ -1,6 +1,6 @@
-if(NOT TARGET mio_tflite)
+if(NOT TARGET mio_tflite280)
   return()
-endif(NOT TARGET mio_tflite)
+endif(NOT TARGET mio_tflite280)
 
 set(DRIVER "driver/Driver.cpp")
 
diff --git a/compiler/tfl-inspect/driver/Driver.cpp b/compiler/tfl-inspect/driver/Driver.cpp
index 3e62e0ffb..8505ff4aa 100644
--- a/compiler/tfl-inspect/driver/Driver.cpp
+++ b/compiler/tfl-inspect/driver/Driver.cpp
@@ -35,7 +35,7 @@ int entry(int argc, char **argv)
     .nargs(0)
     .help("Dump Conv2D series weight operators in tflite file");
   arser.add_argument("--op_version").nargs(0).help("Dump versions of the operators in tflite file");
-  arser.add_argument("tflite").type(arser::DataType::STR).help("TFLite file to inspect");
+  arser.add_argument("tflite").help("TFLite file to inspect");
 
   try
   {
diff --git a/compiler/tfl-verify/CMakeLists.txt b/compiler/tfl-verify/CMakeLists.txt
index 2fba335ea..5bead5bb4 100644
--- a/compiler/tfl-verify/CMakeLists.txt
+++ b/compiler/tfl-verify/CMakeLists.txt
@@ -1,6 +1,6 @@
-if(NOT TARGET mio_tflite)
+if(NOT TARGET mio_tflite280)
   return()
-endif(NOT TARGET mio_tflite)
+endif(NOT TARGET mio_tflite280)
 
 file(GLOB_RECURSE SOURCES "src/*.cpp")
 
diff --git a/compiler/tfl-verify/src/Driver.cpp b/compiler/tfl-verify/src/Driver.cpp
index 6d1897607..62345494b 100644
--- a/compiler/tfl-verify/src/Driver.cpp
+++ b/compiler/tfl-verify/src/Driver.cpp
@@ -25,7 +25,7 @@
 int entry(int argc, char **argv)
 {
   arser::Arser arser;
-  arser.add_argument("tflite").type(arser::DataType::STR).help("TFLite file path to verify");
+  arser.add_argument("tflite").help("TFLite file path to verify");
 
   try
   {
diff --git a/compiler/tflchef/CMakeLists.txt b/compiler/tflchef/CMakeLists.txt
index 948b1cecd..6205ac650 100644
--- a/compiler/tflchef/CMakeLists.txt
+++ b/compiler/tflchef/CMakeLists.txt
@@ -20,4 +20,9 @@ add_subdirectory(core)
 add_subdirectory(tflite)
 # Tools
 add_subdirectory(tools)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 add_subdirectory(tests)
diff --git a/compiler/tflchef/core/src/Convert.cpp b/compiler/tflchef/core/src/Convert.cpp
index 200c71eca..f4dd4b332 100644
--- a/compiler/tflchef/core/src/Convert.cpp
+++ b/compiler/tflchef/core/src/Convert.cpp
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -62,6 +63,8 @@ tflite::TensorType as_tflite_tensortype(const tflchef::TensorType &value)
   {
     case tflchef::FLOAT32:
       return tflite::TensorType_FLOAT32;
+    case tflchef::FLOAT16:
+      return tflite::TensorType_FLOAT16;
     case tflchef::INT32:
       return tflite::TensorType_INT32;
     case tflchef::UINT8:
@@ -164,3 +167,222 @@ as_tflite_sparse_index_vec(flatbuffers::FlatBufferBuilder &fb,
 
   throw std::runtime_error("Unknown SparseIndexVector type");
 }
+
+// namespace sparsity code referenced from
+// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/
+//       tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc
+
+namespace sparsity
+{
+
+template <typename T>
+FormatConverter<T>::FormatConverter(const std::vector<int> &shape,
+                                    const std::vector<int> &traversal_order,
+                                    const std::vector<TfLiteDimensionType> &format,
+                                    const std::vector<int> &block_size,
+                                    const std::vector<int> &block_map)
+  : dense_shape_(shape), traversal_order_(traversal_order), block_size_(block_size),
+    block_map_(block_map)
+{
+  dense_size_ = 1;
+  int block_dim = 0;
+  blocked_shape_.resize(shape.size());
+  format_.resize(shape.size() + block_map.size());
+  for (int i = 0; i < shape.size(); i++)
+  {
+    format_[i] = format[traversal_order[i]];
+    dense_size_ *= shape[i];
+    if (block_dim < block_map.size() && block_map[block_dim] == i)
+    {
+      blocked_shape_[i] = shape[i] / block_size[block_dim];
+      block_dim++;
+    }
+    else
+    {
+      blocked_shape_[i] = shape[i];
+    }
+  }
+
+  // Only dense blocks are supported.
+  for (int i = 0; i < block_map.size(); i++)
+  {
+    format_[i + shape.size()] = kTfLiteDimDense;
+  }
+}
+
+template <typename T> bool FormatConverter<T>::DenseToSparse(const T *src_data)
+{
+  int num_original_dims = dense_shape_.size();
+  int num_block_dims = block_map_.size();
+  int num_expanded_dims = num_original_dims + num_block_dims;
+  std::vector<int> expanded_shape(num_expanded_dims);
+  for (int i = 0; i < num_expanded_dims; i++)
+  {
+    if (i < num_original_dims)
+    {
+      expanded_shape[i] = blocked_shape_[i];
+    }
+    else
+    {
+      expanded_shape[i] = block_size_[i - num_original_dims];
+    }
+  }
+
+  std::vector<int> shape_offset(num_original_dims);
+  shape_offset[shape_offset.size() - 1] = 1;
+  for (int i = num_original_dims - 1; i > 0; --i)
+  {
+    shape_offset[i - 1] = shape_offset[i] * dense_shape_[i];
+  }
+
+  std::vector<int> expanded_shape_offset(num_expanded_dims);
+  for (int i = 0; i < num_original_dims; ++i)
+  {
+    expanded_shape_offset[i] = shape_offset[i];
+  }
+  for (int i = 0; i < num_block_dims; ++i)
+  {
+    int mapped_dim = block_map_[i];
+    expanded_shape_offset[num_original_dims + i] = shape_offset[mapped_dim];
+    expanded_shape_offset[mapped_dim] *= block_size_[i];
+  }
+
+  std::vector<int> dst_ordered_offset(num_expanded_dims);
+  for (int i = 0; i < num_expanded_dims; ++i)
+  {
+    dst_ordered_offset[i] = expanded_shape_offset[traversal_order_[i]];
+  }
+
+  std::vector<bool> dst_dim_has_nonzeroes(num_expanded_dims);
+  std::fill(dst_dim_has_nonzeroes.begin(), dst_dim_has_nonzeroes.end(), false);
+  std::vector<int> inner_compressed_dim(num_expanded_dims);
+  int most_recent_compressed_dim = -1;
+  std::vector<int> num_segments_of_next_compressed_dim(num_expanded_dims);
+  int segment_count = 1;
+  for (int i = num_expanded_dims - 1; i >= 0; --i)
+  {
+    inner_compressed_dim[i] = most_recent_compressed_dim;
+    if (format_[i] == kTfLiteDimSparseCSR)
+    {
+      most_recent_compressed_dim = i;
+      num_segments_of_next_compressed_dim[i] = segment_count;
+      segment_count = 1;
+    }
+    else
+    {
+      num_segments_of_next_compressed_dim[i] = -1;
+      segment_count *= expanded_shape[traversal_order_[i]];
+    }
+  }
+
+  dim_metadata_.resize(num_expanded_dims * 2);
+  std::vector<int> dst_sparse_dims;
+  dst_sparse_dims.reserve(num_expanded_dims);
+  for (int i = 0; i < num_expanded_dims; ++i)
+  {
+    dim_metadata_[i * 2].clear();
+    dim_metadata_[i * 2 + 1].clear();
+    if (format_[i] == kTfLiteDimDense)
+    {
+      // If dimension is dense, just store the shape.
+      dim_metadata_[i * 2].push_back(expanded_shape[traversal_order_[i]]);
+    }
+    else
+    {
+      dim_metadata_[i * 2].push_back(0); // Segment array always begins with 0.
+      dst_sparse_dims.push_back(i);      // Add dimension to the sparse list.
+    }
+  }
+
+  // This algorithm assumes that the block size is small enough for all the
+  // elements to fit in cache, so the strided accesses from different traversal
+  // order and the write-first-erase-later strategy shouldn't be too slow
+  int dst_dim_idx = num_expanded_dims;
+  std::vector<int> coordinate(num_expanded_dims, 0);
+  int dense_tensor_idx = 0;
+  while (dst_dim_idx >= 0)
+  {
+    if (dst_dim_idx == num_expanded_dims)
+    {
+      // We have a complete coordinate. Add the element to the value array if it
+      // is not zero, or if the last dimension is dense.
+      if (!IsZero(src_data[dense_tensor_idx]))
+      {
+        data_.push_back(src_data[dense_tensor_idx]);
+        // Mark all sparse dimensions that their current indices have nonzeroes.
+        for (auto dst_dim : dst_sparse_dims)
+        {
+          if (!dst_dim_has_nonzeroes[dst_dim])
+          {
+            // Only add the index to the indices array if the current nonzero
+            // is the first nonzero of the block.
+            dim_metadata_[2 * dst_dim + 1].push_back(coordinate[dst_dim]);
+            dst_dim_has_nonzeroes[dst_dim] = true;
+          }
+        }
+      }
+      else if (format_[num_expanded_dims - 1] == kTfLiteDimDense)
+      {
+        data_.push_back(src_data[dense_tensor_idx]);
+      }
+      --dst_dim_idx;
+    }
+    else
+    {
+      int original_dim_idx = traversal_order_[dst_dim_idx];
+      int dim_size = expanded_shape[original_dim_idx];
+      if (dst_dim_has_nonzeroes[dst_dim_idx])
+      {
+        // If the previous block has nonzeroes, reset the flag to false since
+        // we have just moved to a new block.
+        dst_dim_has_nonzeroes[dst_dim_idx] = false;
+      }
+      else if (format_[dst_dim_idx] == kTfLiteDimSparseCSR)
+      {
+        // This block is empty. Delete unnecessary values if compressed.
+        int next_compressed_dim = inner_compressed_dim[dst_dim_idx];
+        int erase_offset = dim_metadata_[2 * dst_dim_idx + 1].size() *
+                           num_segments_of_next_compressed_dim[dst_dim_idx];
+        if (next_compressed_dim >= 0)
+        {
+          auto &segments = dim_metadata_[2 * inner_compressed_dim[dst_dim_idx]];
+          segments.erase(segments.begin() + 1 + erase_offset, segments.end());
+        }
+        else
+        {
+          data_.erase(data_.begin() + erase_offset, data_.end());
+        }
+      }
+      if (++coordinate[dst_dim_idx] < dim_size)
+      {
+        // The current dst_dim_idx is valid (not out of bound).
+        dense_tensor_idx += dst_ordered_offset[dst_dim_idx];
+        ++dst_dim_idx;
+      }
+      else
+      {
+        // dst_dim_idx has reached its dim size. Update segment array and go
+        // back to incrementing the previous dimension (dst_dim_idx - 1).
+        if (format_[dst_dim_idx] == kTfLiteDimSparseCSR)
+        {
+          dim_metadata_[2 * dst_dim_idx].push_back(dim_metadata_[2 * dst_dim_idx + 1].size());
+        }
+        coordinate[dst_dim_idx] = -1;
+        dense_tensor_idx -= dst_ordered_offset[dst_dim_idx] * dim_size;
+        --dst_dim_idx;
+      }
+    }
+  }
+
+  return true;
+}
+
+template <typename T> bool FormatConverter<T>::IsZero(const T val)
+{
+  return (val == static_cast<T>(0));
+}
+
+template class FormatConverter<float>;
+template class FormatConverter<uint16_t>; // float16
+
+} // namespace sparsity
diff --git a/compiler/tflchef/core/src/Convert.h b/compiler/tflchef/core/src/Convert.h
index 45c93d229..6e910ea2c 100644
--- a/compiler/tflchef/core/src/Convert.h
+++ b/compiler/tflchef/core/src/Convert.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -34,4 +35,52 @@ flatbuffers::Offset<void>
 as_tflite_sparse_index_vec(flatbuffers::FlatBufferBuilder &fb,
                            const ::tflchef::TensorSparsity_IndexVec &value);
 
+// codes under namespace sparsity referenced from
+// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/
+//       tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h
+//       tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc
+
+namespace sparsity
+{
+
+// Storage format of each dimension in a sparse tensor.
+typedef enum TfLiteDimensionType
+{
+  kTfLiteDimDense = 0,
+  kTfLiteDimSparseCSR,
+} TfLiteDimensionType;
+
+template <typename T> class FormatConverter
+{
+public:
+  FormatConverter(const std::vector<int32_t> &shape, const std::vector<int32_t> &traversal_order,
+                  const std::vector<TfLiteDimensionType> &format,
+                  const std::vector<int32_t> &block_size = {},
+                  const std::vector<int32_t> &block_map = {});
+
+  bool DenseToSparse(const T *src_data);
+
+  const std::vector<T> &GetData() { return data_; }
+  const std::vector<std::vector<int32_t>> &GetDimMetadata() { return dim_metadata_; }
+
+private:
+  bool IsZero(const T val);
+
+private:
+  std::vector<int32_t> dense_shape_;
+  std::vector<int32_t> blocked_shape_;
+  size_t dense_size_;
+  std::vector<int32_t> traversal_order_;
+  std::vector<TfLiteDimensionType> format_;
+  std::vector<int32_t> block_size_;
+  std::vector<int32_t> block_map_;
+  std::vector<std::vector<int32_t>> dim_metadata_;
+  std::vector<T> data_;
+};
+
+extern template class FormatConverter<float>;
+extern template class FormatConverter<uint16_t>; // float16
+
+} // namespace sparsity
+
 #endif // __CONVERT_H__
diff --git a/compiler/tflchef/core/src/DataChef.def b/compiler/tflchef/core/src/DataChef.def
index c634c047e..28a5b7617 100644
--- a/compiler/tflchef/core/src/DataChef.def
+++ b/compiler/tflchef/core/src/DataChef.def
@@ -21,3 +21,7 @@ DATA_CHEF(FLOAT32, gaussian, GaussianFloat32DataChefFactory)
 DATA_CHEF(INT32, gaussian, GaussianInt32DataChefFactory)
 DATA_CHEF(INT16, gaussian, GaussianInt16DataChefFactory)
 DATA_CHEF(UINT8, gaussian, GaussianUint8DataChefFactory)
+
+// FLOAT16 support for only gaussian, explicit for now
+DATA_CHEF(FLOAT16, explicit, ExplicitFloat16DataChefFactory)
+DATA_CHEF(FLOAT16, gaussian, GaussianFloat16DataChefFactory)
diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp
index 93b9334a6..a788adc02 100644
--- a/compiler/tflchef/core/src/ModelChef.cpp
+++ b/compiler/tflchef/core/src/ModelChef.cpp
@@ -92,6 +92,7 @@ DataChefRegistry &data_chef_registry(const tflchef::TensorType &type)
   static DataChefRegistry string;
   static DataChefRegistry boolean;
   static DataChefRegistry s16;
+  static DataChefRegistry fp16;
 
   switch (type)
   {
@@ -101,6 +102,8 @@ DataChefRegistry &data_chef_registry(const tflchef::TensorType &type)
       return s64;
     case tflchef::FLOAT32:
       return fp32;
+    case tflchef::FLOAT16:
+      return fp16;
     case tflchef::UINT8:
       return u8;
     case tflchef::STRING:
@@ -207,6 +210,41 @@ struct CookParams
   std::string noname;
 };
 
+std::vector<flatbuffers::Offset<tflite::DimensionMetadata>>
+make_dim_metadata_vec(flatbuffers::FlatBufferBuilder *flatbuffer_builder, int32_t dims_count,
+                      const std::vector<int> &traversal_order_vec,
+                      const std::vector<sparsity::TfLiteDimensionType> &format_vec,
+                      const std::vector<std::vector<int32_t>> &dim_metadata_src)
+{
+  // Build sparsity parameter.
+  std::vector<flatbuffers::Offset<tflite::DimensionMetadata>> dim_metadata_vec(dims_count);
+  for (int32_t i = 0; i < dims_count; i++)
+  {
+    const int32_t metadata_idx = 2 * i;
+    if (format_vec[traversal_order_vec[i]] == sparsity::kTfLiteDimSparseCSR)
+    {
+      auto array_segments =
+        tflite::CreateInt32Vector(*flatbuffer_builder,
+                                  flatbuffer_builder->CreateVector(dim_metadata_src[metadata_idx]))
+          .Union();
+      auto array_indices =
+        tflite::CreateInt32Vector(
+          *flatbuffer_builder, flatbuffer_builder->CreateVector(dim_metadata_src[metadata_idx + 1]))
+          .Union();
+      dim_metadata_vec[i] =
+        tflite::CreateDimensionMetadata(*flatbuffer_builder, tflite::DimensionType_SPARSE_CSR, 0,
+                                        tflite::SparseIndexVector_Int32Vector, array_segments,
+                                        tflite::SparseIndexVector_Int32Vector, array_indices);
+    }
+    else
+    {
+      dim_metadata_vec[i] = tflite::CreateDimensionMetadata(
+        *flatbuffer_builder, tflite::DimensionType_DENSE, dim_metadata_src[metadata_idx][0]);
+    }
+  }
+  return dim_metadata_vec;
+}
+
 template <typename T> std::map<std::string, int32_t> cook_graph(const T &graph, CookParams &cp)
 {
   LOGGER(l);
@@ -271,6 +309,8 @@ template <typename T> std::map<std::string, int32_t> cook_graph(const T &graph,
 
     assert(operand.has_type());
 
+    flatbuffers::Offset<tflite::SparsityParameters> sparsity_index;
+
     flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape;
     std::vector<int32_t> dims;
     if (operand.has_shape())
@@ -298,16 +338,125 @@ template <typename T> std::map<std::string, int32_t> cook_graph(const T &graph,
       // Create Data
       int32_t count = (element_count(dims) > 0) ? element_count(dims) : filler.arg_size();
       auto data_vec = chef->generate(count);
-      auto data = flatbuffer_builder->CreateVector(data_vec);
 
-      // Create Buffer
-      tflite::BufferBuilder buffer_builder{*flatbuffer_builder};
-      buffer_builder.add_data(data);
-      auto buffer = buffer_builder.Finish();
+      if (operand.has_make_sparse() && operand.make_sparse())
+      {
+        assert(not operand.has_sparsity());
+        assert(operand.has_shape());
+
+        const int32_t dims_count = dims.size();
+        std::vector<int> traversal_order_vec;
+        std::vector<sparsity::TfLiteDimensionType> format_vec;
+        for (int32_t o = 0; o < dims_count; ++o)
+          traversal_order_vec.push_back(o);
+        for (int32_t o = 0; o < dims_count - 1; ++o)
+          format_vec.push_back(sparsity::kTfLiteDimDense);
+        format_vec.push_back(sparsity::kTfLiteDimSparseCSR);
+
+        if (operand.type() == tflchef::FLOAT32)
+        {
+          ::sparsity::FormatConverter<float> converter(dims, traversal_order_vec, format_vec);
+          converter.DenseToSparse(reinterpret_cast<const float *>(data_vec.data()));
+          const auto &sparse_data = converter.GetData();
+
+          std::vector<uint8_t> sparse_uint8;
+          for (int c = 0; c < sparse_data.size(); ++c)
+          {
+            const float value = sparse_data.at(c);
+            const uint8_t *arr = reinterpret_cast<const uint8_t *>(&value);
+            for (uint32_t b = 0; b < sizeof(float); ++b)
+            {
+              sparse_uint8.emplace_back(arr[b]);
+            }
+          }
+          auto data = flatbuffer_builder->CreateVector(sparse_uint8);
+
+          // Create Buffer
+          tflite::BufferBuilder buffer_builder{*flatbuffer_builder};
+          buffer_builder.add_data(data);
+          auto buffer = buffer_builder.Finish();
+
+          // Update Buffer Index & Vector
+          buffer_index = buffer_vec.size();
+          buffer_vec.emplace_back(buffer);
+
+          // save SparsityParameters
+          auto traversal_order = flatbuffer_builder->CreateVector(traversal_order_vec);
+
+          // Create block map
+          std::vector<int> block_map_vec{};
+          auto block_map = flatbuffer_builder->CreateVector(block_map_vec);
+
+          // Create dimension metadata
+          const auto &dim_metadata_src = converter.GetDimMetadata();
+          auto dim_metadata_vec =
+            make_dim_metadata_vec(flatbuffer_builder.get(), dims_count, traversal_order_vec,
+                                  format_vec, dim_metadata_src);
+          auto dim_metadata = flatbuffer_builder->CreateVector(dim_metadata_vec);
+          sparsity_index = tflite::CreateSparsityParameters(*flatbuffer_builder, traversal_order,
+                                                            block_map, dim_metadata);
+        }
+        else if (operand.type() == tflchef::FLOAT16)
+        {
+          ::sparsity::FormatConverter<uint16_t> converter(dims, traversal_order_vec, format_vec);
+          converter.DenseToSparse(reinterpret_cast<const uint16_t *>(data_vec.data()));
+          const auto &sparse_data = converter.GetData();
+
+          std::vector<uint8_t> sparse_uint8;
+          for (int c = 0; c < sparse_data.size(); ++c)
+          {
+            const uint16_t value = sparse_data.at(c);
+            const uint8_t *arr = reinterpret_cast<const uint8_t *>(&value);
+            for (uint32_t b = 0; b < sizeof(uint16_t); ++b)
+            {
+              sparse_uint8.emplace_back(arr[b]);
+            }
+          }
+          auto data = flatbuffer_builder->CreateVector(sparse_uint8);
+
+          // Create Buffer
+          tflite::BufferBuilder buffer_builder{*flatbuffer_builder};
+          buffer_builder.add_data(data);
+          auto buffer = buffer_builder.Finish();
+
+          // Update Buffer Index & Vector
+          buffer_index = buffer_vec.size();
+          buffer_vec.emplace_back(buffer);
+
+          // save SparsityParameters
+          auto traversal_order = flatbuffer_builder->CreateVector(traversal_order_vec);
+
+          // Create block map
+          std::vector<int> block_map_vec{};
+          auto block_map = flatbuffer_builder->CreateVector(block_map_vec);
+
+          // Create dimension metadata
+          const auto &dim_metadata_src = converter.GetDimMetadata();
+          auto dim_metadata_vec =
+            make_dim_metadata_vec(flatbuffer_builder.get(), dims_count, traversal_order_vec,
+                                  format_vec, dim_metadata_src);
+          auto dim_metadata = flatbuffer_builder->CreateVector(dim_metadata_vec);
+          sparsity_index = tflite::CreateSparsityParameters(*flatbuffer_builder, traversal_order,
+                                                            block_map, dim_metadata);
+        }
+        else
+        {
+          throw std::runtime_error{"NYI: unsupported operand type"};
+        }
+      }
+      else
+      {
+        auto data = flatbuffer_builder->CreateVector(data_vec);
+
+        // Create Buffer
+        tflite::BufferBuilder buffer_builder{*flatbuffer_builder};
+        buffer_builder.add_data(data);
+        auto buffer = buffer_builder.Finish();
 
-      // Update Buffer Index & Vector
-      buffer_index = buffer_vec.size();
-      buffer_vec.emplace_back(buffer);
+        // Update Buffer Index & Vector
+        buffer_index = buffer_vec.size();
+        buffer_vec.emplace_back(buffer);
+      }
     }
     else
     {
@@ -384,8 +533,6 @@ template <typename T> std::map<std::string, int32_t> cook_graph(const T &graph,
       quant_index = quant_builder.Finish();
     }
 
-    flatbuffers::Offset<tflite::SparsityParameters> sparsity_index;
-
     if (operand.has_sparsity())
     {
       const auto &sparsity = operand.sparsity();
diff --git a/compiler/tflchef/core/src/Op/Densify.cpp b/compiler/tflchef/core/src/Op/Densify.cpp
new file mode 100644
index 000000000..63c4e207a
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Densify.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Densify.h"
+
+flatbuffers::Offset<void> DensifyChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  tflite::DensifyOptionsBuilder options_builder{fbb};
+
+  return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> DensifyChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new DensifyChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Densify.h b/compiler/tflchef/core/src/Op/Densify.h
new file mode 100644
index 000000000..f6af693d9
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Densify.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_DENSIFY_H__
+#define __OP_DENSIFY_H__
+
+#include "OpChef.h"
+
+class DensifyChef final : public OpChef
+{
+public:
+  explicit DensifyChef(const tflchef::Operation *operation) : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_DENSIFY; }
+
+  tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_DensifyOptions; }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct DensifyChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_DENSIFY_H__
diff --git a/compiler/tflchef/core/src/OpChef.def b/compiler/tflchef/core/src/OpChef.def
index beebd359f..c19d00dfb 100644
--- a/compiler/tflchef/core/src/OpChef.def
+++ b/compiler/tflchef/core/src/OpChef.def
@@ -18,6 +18,7 @@ OP_CHEF(Ceil, CeilChefFactory)
 OP_CHEF(Concatenation, ConcatenationChefFactory)
 OP_CHEF(Conv2D, Conv2DChefFactory)
 OP_CHEF(Cos, CosChefFactory)
+OP_CHEF(Densify, DensifyChefFactory)
 OP_CHEF(DepthToSpace, DepthToSpaceChefFactory)
 OP_CHEF(DepthwiseConv2D, DepthwiseConv2DChefFactory)
 OP_CHEF(Dequantize, DequantizeChefFactory)
diff --git a/compiler/tflchef/core/src/OpChefs.h b/compiler/tflchef/core/src/OpChefs.h
index 159019abf..3cd3be558 100644
--- a/compiler/tflchef/core/src/OpChefs.h
+++ b/compiler/tflchef/core/src/OpChefs.h
@@ -31,6 +31,7 @@
 #include "Op/Concatenation.h"
 #include "Op/Conv2D.h"
 #include "Op/Cos.h"
+#include "Op/Densify.h"
 #include "Op/DepthToSpace.h"
 #include "Op/DepthwiseConv2D.h"
 #include "Op/Dequantize.h"
diff --git a/compiler/tflchef/proto/tflchef.proto b/compiler/tflchef/proto/tflchef.proto
index 1abefafe1..da4b6920d 100644
--- a/compiler/tflchef/proto/tflchef.proto
+++ b/compiler/tflchef/proto/tflchef.proto
@@ -15,6 +15,7 @@ package tflchef;
 // This enum value corresponds to TensorType in TensorFlow Lite schema
 enum TensorType {
   FLOAT32 = 0;
+  FLOAT16 = 1;
   INT32 = 2;
   UINT8 = 3;
   INT64 = 4;
@@ -88,6 +89,12 @@ message Operand {
   optional TensorSparsity sparsity = 6;
   optional bool is_variable = 7 [default = false];
   optional ShapeSignature shape_signature = 8;
+  // 'make_sparse' is to tell tflchef to make a sparse tensor
+  // as filling 'TensorSparsity' by hand can be difficult
+  // for now, last dimension will be SPARSE_CSR
+  // ex) shape [2, 3, 4] will have
+  //     TraversalOrder [0, 1, 2] with [DENSE, DENSE, SPARSE_CSR]
+  optional bool make_sparse = 9 [default = false];
 }
 
 // This enum value corresponds to Padding in TensorFlow Lite schema
@@ -534,6 +541,10 @@ message FakeQuantOptions {
   optional bool narrow_range = 4 [default = false];
 }
 
+message DensifyOptions {
+  // NONE
+}
+
 message Operation {
   optional string type = 1;
   repeated string input = 2;
@@ -650,6 +661,7 @@ message Operation {
   optional AddNOptions add_n_options = 207;
   optional MatMulOptions matmul_options = 208;
   optional MaxPoolWithArgmaxOptions max_pool_with_argmax_options = 209;
+  optional DensifyOptions densify_options = 210;
   // NOTE if there are more than two options with same type of Options
   // use the number not listed in the above reserve list
 }
diff --git a/compiler/tflchef/tests/make_sparse/test.recipe b/compiler/tflchef/tests/make_sparse/test.recipe
new file mode 100644
index 000000000..15cc93a5d
--- /dev/null
+++ b/compiler/tflchef/tests/make_sparse/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "in"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "sparse"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "2" arg: "0" arg: "0" arg: "0"
+    arg: "0" arg: "0" arg: "0" arg: "0"
+    arg: "0" arg: "0" arg: "0" arg: "0"
+    arg: "0" arg: "0" arg: "0" arg: "3"
+  }
+  make_sparse: true
+}
+operand {
+  name: "dense"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operation {
+  type: "Densify"
+  input: "sparse"
+  output: "dense"
+}
+operation {
+  type: "Add"
+  input: "in"
+  input: "dense"
+  output: "out"
+  add_options {
+    activation: NONE
+  }
+}
+input: "in"
+output: "out"
diff --git a/compiler/tflchef/tests/make_sparse_f16/test.recipe b/compiler/tflchef/tests/make_sparse_f16/test.recipe
new file mode 100644
index 000000000..5977a1d32
--- /dev/null
+++ b/compiler/tflchef/tests/make_sparse_f16/test.recipe
@@ -0,0 +1,54 @@
+operand {
+  name: "in"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "sparse16"
+  type: FLOAT16
+  shape { dim: 4 dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "2" arg: "0" arg: "0" arg: "0"
+    arg: "0" arg: "0" arg: "0" arg: "0"
+    arg: "0" arg: "0" arg: "0" arg: "0"
+    arg: "0" arg: "0" arg: "0" arg: "3"
+  }
+  make_sparse: true
+}
+operand {
+  name: "dense16"
+  type: FLOAT16
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "dense32"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operation {
+  type: "Densify"
+  input: "sparse16"
+  output: "dense16"
+}
+operation {
+  type: "Dequantize"
+  input: "dense16"
+  output: "dense32"
+}
+operation {
+  type: "Add"
+  input: "in"
+  input: "dense32"
+  output: "out"
+  add_options {
+    activation: NONE
+  }
+}
+input: "in"
+output: "out"
diff --git a/compiler/tflchef/tflite/CMakeLists.txt b/compiler/tflchef/tflite/CMakeLists.txt
index 3c3352b0a..d9a20a2e1 100644
--- a/compiler/tflchef/tflite/CMakeLists.txt
+++ b/compiler/tflchef/tflite/CMakeLists.txt
@@ -3,6 +3,7 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
 add_library(tflchef_tflite STATIC ${SOURCES})
 target_include_directories(tflchef_tflite PUBLIC include)
 target_include_directories(tflchef_tflite PRIVATE src)
+target_include_directories(tflchef_tflite PRIVATE src/Op/include)
 target_link_libraries(tflchef_tflite tflchef_proto)
 target_link_libraries(tflchef_tflite mio_tflite280)
 target_link_libraries(tflchef_tflite mio_tflite280_helper)
diff --git a/compiler/tflchef/tflite/src/Convert.cpp b/compiler/tflchef/tflite/src/Convert.cpp
index f47e51d3d..242987661 100644
--- a/compiler/tflchef/tflite/src/Convert.cpp
+++ b/compiler/tflchef/tflite/src/Convert.cpp
@@ -35,8 +35,9 @@ tflchef::TensorType as_tflchef_type(const tflite::TensorType type)
       return tflchef::BOOL;
     case tflite::TensorType_INT16:
       return tflchef::INT16;
+    case tflite::TensorType_FLOAT16:
+      return tflchef::FLOAT16;
     // TODO handle other types
-    // TensorType_FLOAT16
     // TensorType_STRING
     // TensorType_COMPLEX64
     default:
diff --git a/compiler/tflchef/tflite/src/FillerHelper.cpp b/compiler/tflchef/tflite/src/FillerHelper.cpp
index cf96d2e8c..1ac99ad40 100644
--- a/compiler/tflchef/tflite/src/FillerHelper.cpp
+++ b/compiler/tflchef/tflite/src/FillerHelper.cpp
@@ -48,3 +48,18 @@ void fill_tensor_to_import(int32_t idx, TFliteImport *import)
 }
 
 } // namespace tflchef
+
+// helpers of common codes for filling inputs
+namespace tflchef
+{
+
+void fill_two_inputs(const tflite::Operator *op, TFliteImport *import)
+{
+  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+  assert(inputs.size() == 2);
+
+  fill_tensor_to_import(inputs[0], import);
+  fill_tensor_to_import(inputs[1], import);
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/FillerHelper.h b/compiler/tflchef/tflite/src/FillerHelper.h
index 053a5c18a..e96ae73d0 100644
--- a/compiler/tflchef/tflite/src/FillerHelper.h
+++ b/compiler/tflchef/tflite/src/FillerHelper.h
@@ -28,4 +28,12 @@ void fill_tensor_to_import(int32_t idx, TFliteImport *import);
 
 } // namespace tflchef
 
+// helpers of common codes for filling inputs
+namespace tflchef
+{
+
+void fill_two_inputs(const tflite::Operator *op, TFliteImport *import);
+
+} // namespace tflchef
+
 #endif // __FILLER_HELPER_H__
diff --git a/compiler/tflchef/tflite/src/Op/Add.cpp b/compiler/tflchef/tflite/src/Op/Add.cpp
index 3e880a63b..23d360616 100644
--- a/compiler/tflchef/tflite/src/Op/Add.cpp
+++ b/compiler/tflchef/tflite/src/Op/Add.cpp
@@ -27,11 +27,7 @@ void TFliteOpAdd::filler(const tflite::Operator *op, TFliteImport *import,
 {
   // Add may have constant input
 
-  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
-  assert(inputs.size() == 2);
-
-  fill_tensor_to_import(inputs[0], import);
-  fill_tensor_to_import(inputs[1], import);
+  fill_two_inputs(op, import);
 }
 
 tflchef::Operation *TFliteOpAdd::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/Maximum.cpp b/compiler/tflchef/tflite/src/Op/Maximum.cpp
index d52caf0c2..65e4c2c99 100644
--- a/compiler/tflchef/tflite/src/Op/Maximum.cpp
+++ b/compiler/tflchef/tflite/src/Op/Maximum.cpp
@@ -25,11 +25,7 @@ namespace tflchef
 void TFliteOpMaximum::filler(const tflite::Operator *op, TFliteImport *import,
                              tflchef::ModelRecipe *model_recipe) const
 {
-  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
-  assert(inputs.size() == 2);
-
-  fill_tensor_to_import(inputs[0], import);
-  fill_tensor_to_import(inputs[1], import);
+  fill_two_inputs(op, import);
 }
 
 tflchef::Operation *TFliteOpMaximum::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/Minimum.cpp b/compiler/tflchef/tflite/src/Op/Minimum.cpp
index 6440f1deb..b4d255ce3 100644
--- a/compiler/tflchef/tflite/src/Op/Minimum.cpp
+++ b/compiler/tflchef/tflite/src/Op/Minimum.cpp
@@ -25,11 +25,7 @@ namespace tflchef
 void TFliteOpMinimum::filler(const tflite::Operator *op, TFliteImport *import,
                              tflchef::ModelRecipe *model_recipe) const
 {
-  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
-  assert(inputs.size() == 2);
-
-  fill_tensor_to_import(inputs[0], import);
-  fill_tensor_to_import(inputs[1], import);
+  fill_two_inputs(op, import);
 }
 
 tflchef::Operation *TFliteOpMinimum::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/Mul.cpp b/compiler/tflchef/tflite/src/Op/Mul.cpp
index 9faa4acaf..1145ff7e6 100644
--- a/compiler/tflchef/tflite/src/Op/Mul.cpp
+++ b/compiler/tflchef/tflite/src/Op/Mul.cpp
@@ -27,11 +27,7 @@ void TFliteOpMul::filler(const tflite::Operator *op, TFliteImport *import,
 {
   // Mul may have constant input
 
-  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
-  assert(inputs.size() == 2);
-
-  fill_tensor_to_import(inputs[0], import);
-  fill_tensor_to_import(inputs[1], import);
+  fill_two_inputs(op, import);
 }
 
 tflchef::Operation *TFliteOpMul::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp
index ad9921970..4f096ced4 100644
--- a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp
+++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp
@@ -38,7 +38,7 @@ void TFliteOpNonMaxSuppressionV4::filler(const tflite::Operator *op, TFliteImpor
 
   for (int32_t index = 2; index < 5; ++index)
   {
-    fill_tensor_to_import(index, import);
+    fill_tensor_to_import(inputs[index], import);
   }
 }
 
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp
index db7f4c932..332cba0ff 100644
--- a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp
+++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp
@@ -41,7 +41,7 @@ void TFliteOpNonMaxSuppressionV5::filler(const tflite::Operator *op, TFliteImpor
 
   for (int32_t index = 2; index < 6; ++index)
   {
-    fill_tensor_to_import(index, import);
+    fill_tensor_to_import(inputs[index], import);
   }
 }
 
diff --git a/compiler/tflchef/tflite/src/Op/PadV2.cpp b/compiler/tflchef/tflite/src/Op/PadV2.cpp
index 0b1c9f3b2..a6b657f59 100644
--- a/compiler/tflchef/tflite/src/Op/PadV2.cpp
+++ b/compiler/tflchef/tflite/src/Op/PadV2.cpp
@@ -16,6 +16,7 @@
 
 #include "PadV2.h"
 
+#include "Convert.h"
 #include "FillerHelper.h"
 
 namespace tflchef
@@ -24,9 +25,11 @@ namespace tflchef
 void TFliteOpPadV2::filler(const tflite::Operator *op, TFliteImport *import,
                            tflchef::ModelRecipe *model_recipe) const
 {
+  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+
   // Filler for paddings and constant_values
-  fill_tensor_to_import(1, import);
-  fill_tensor_to_import(2, import);
+  fill_tensor_to_import(inputs[1], import);
+  fill_tensor_to_import(inputs[2], import);
 }
 
 tflchef::Operation *TFliteOpPadV2::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/ScatterNd.cpp b/compiler/tflchef/tflite/src/Op/ScatterNd.cpp
index 548a09a67..ec09a69a4 100644
--- a/compiler/tflchef/tflite/src/Op/ScatterNd.cpp
+++ b/compiler/tflchef/tflite/src/Op/ScatterNd.cpp
@@ -25,9 +25,11 @@ namespace tflchef
 void TFliteOpScatterNd::filler(const tflite::Operator *op, TFliteImport *import,
                                tflchef::ModelRecipe *model_recipe) const
 {
+  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+
   // Filler for indices and shape
-  fill_tensor_to_import(0, import);
-  fill_tensor_to_import(2, import);
+  fill_tensor_to_import(inputs[0], import);
+  fill_tensor_to_import(inputs[2], import);
 }
 
 tflchef::Operation *TFliteOpScatterNd::build(const tflite::Operator *, TFliteImport *,
diff --git a/compiler/tflchef/tflite/src/Op/SegmentSum.cpp b/compiler/tflchef/tflite/src/Op/SegmentSum.cpp
index a975ca4b3..bc45a94e0 100644
--- a/compiler/tflchef/tflite/src/Op/SegmentSum.cpp
+++ b/compiler/tflchef/tflite/src/Op/SegmentSum.cpp
@@ -16,6 +16,7 @@
 
 #include "SegmentSum.h"
 
+#include "Convert.h"
 #include "FillerHelper.h"
 
 namespace tflchef
@@ -24,8 +25,10 @@ namespace tflchef
 void TFliteOpSegmentSum::filler(const tflite::Operator *op, TFliteImport *import,
                                 tflchef::ModelRecipe *model_recipe) const
 {
-  // Filler for indices and shape
-  fill_tensor_to_import(1, import);
+  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+
+  // Filler for segment_ids
+  fill_tensor_to_import(inputs[1], import);
 }
 
 tflchef::Operation *TFliteOpSegmentSum::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/Sub.cpp b/compiler/tflchef/tflite/src/Op/Sub.cpp
index 0a08bbfdf..584be0ab9 100644
--- a/compiler/tflchef/tflite/src/Op/Sub.cpp
+++ b/compiler/tflchef/tflite/src/Op/Sub.cpp
@@ -27,11 +27,7 @@ void TFliteOpSub::filler(const tflite::Operator *op, TFliteImport *import,
 {
   // Sub may have constant input
 
-  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
-  assert(inputs.size() == 2);
-
-  fill_tensor_to_import(inputs[0], import);
-  fill_tensor_to_import(inputs[1], import);
+  fill_two_inputs(op, import);
 }
 
 tflchef::Operation *TFliteOpSub::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/Abs.h b/compiler/tflchef/tflite/src/Op/include/Abs.h
index d99b0d593..d99b0d593 100644
--- a/compiler/tflchef/tflite/src/Op/Abs.h
+++ b/compiler/tflchef/tflite/src/Op/include/Abs.h
diff --git a/compiler/tflchef/tflite/src/Op/Add.h b/compiler/tflchef/tflite/src/Op/include/Add.h
index 49d945f8b..49d945f8b 100644
--- a/compiler/tflchef/tflite/src/Op/Add.h
+++ b/compiler/tflchef/tflite/src/Op/include/Add.h
diff --git a/compiler/tflchef/tflite/src/Op/AddN.h b/compiler/tflchef/tflite/src/Op/include/AddN.h
index 4387aa06a..4387aa06a 100644
--- a/compiler/tflchef/tflite/src/Op/AddN.h
+++ b/compiler/tflchef/tflite/src/Op/include/AddN.h
diff --git a/compiler/tflchef/tflite/src/Op/ArgMax.h b/compiler/tflchef/tflite/src/Op/include/ArgMax.h
index 30068ecf2..30068ecf2 100644
--- a/compiler/tflchef/tflite/src/Op/ArgMax.h
+++ b/compiler/tflchef/tflite/src/Op/include/ArgMax.h
diff --git a/compiler/tflchef/tflite/src/Op/ArgMin.h b/compiler/tflchef/tflite/src/Op/include/ArgMin.h
index 83c643c1a..83c643c1a 100644
--- a/compiler/tflchef/tflite/src/Op/ArgMin.h
+++ b/compiler/tflchef/tflite/src/Op/include/ArgMin.h
diff --git a/compiler/tflchef/tflite/src/Op/AveragePool2D.h b/compiler/tflchef/tflite/src/Op/include/AveragePool2D.h
index f9e9fb254..f9e9fb254 100644
--- a/compiler/tflchef/tflite/src/Op/AveragePool2D.h
+++ b/compiler/tflchef/tflite/src/Op/include/AveragePool2D.h
diff --git a/compiler/tflchef/tflite/src/Op/BatchMatMul.h b/compiler/tflchef/tflite/src/Op/include/BatchMatMul.h
index 6eb4c6e68..6eb4c6e68 100644
--- a/compiler/tflchef/tflite/src/Op/BatchMatMul.h
+++ b/compiler/tflchef/tflite/src/Op/include/BatchMatMul.h
diff --git a/compiler/tflchef/tflite/src/Op/BatchToSpaceND.h b/compiler/tflchef/tflite/src/Op/include/BatchToSpaceND.h
index ae2114c97..ae2114c97 100644
--- a/compiler/tflchef/tflite/src/Op/BatchToSpaceND.h
+++ b/compiler/tflchef/tflite/src/Op/include/BatchToSpaceND.h
diff --git a/compiler/tflchef/tflite/src/Op/BidirectionalSequenceLSTM.h b/compiler/tflchef/tflite/src/Op/include/BidirectionalSequenceLSTM.h
index 333f542ac..333f542ac 100644
--- a/compiler/tflchef/tflite/src/Op/BidirectionalSequenceLSTM.h
+++ b/compiler/tflchef/tflite/src/Op/include/BidirectionalSequenceLSTM.h
diff --git a/compiler/tflchef/tflite/src/Op/Cast.h b/compiler/tflchef/tflite/src/Op/include/Cast.h
index 29c126c93..29c126c93 100644
--- a/compiler/tflchef/tflite/src/Op/Cast.h
+++ b/compiler/tflchef/tflite/src/Op/include/Cast.h
diff --git a/compiler/tflchef/tflite/src/Op/Ceil.h b/compiler/tflchef/tflite/src/Op/include/Ceil.h
index 44df20778..44df20778 100644
--- a/compiler/tflchef/tflite/src/Op/Ceil.h
+++ b/compiler/tflchef/tflite/src/Op/include/Ceil.h
diff --git a/compiler/tflchef/tflite/src/Op/Concatenation.h b/compiler/tflchef/tflite/src/Op/include/Concatenation.h
index 4a7ea5791..4a7ea5791 100644
--- a/compiler/tflchef/tflite/src/Op/Concatenation.h
+++ b/compiler/tflchef/tflite/src/Op/include/Concatenation.h
diff --git a/compiler/tflchef/tflite/src/Op/Conv2D.h b/compiler/tflchef/tflite/src/Op/include/Conv2D.h
index 0216e9ce9..0216e9ce9 100644
--- a/compiler/tflchef/tflite/src/Op/Conv2D.h
+++ b/compiler/tflchef/tflite/src/Op/include/Conv2D.h
diff --git a/compiler/tflchef/tflite/src/Op/Cos.h b/compiler/tflchef/tflite/src/Op/include/Cos.h
index 8f3dbe3a6..8f3dbe3a6 100644
--- a/compiler/tflchef/tflite/src/Op/Cos.h
+++ b/compiler/tflchef/tflite/src/Op/include/Cos.h
diff --git a/compiler/tflchef/tflite/src/Op/DepthToSpace.h b/compiler/tflchef/tflite/src/Op/include/DepthToSpace.h
index b5852ac89..b5852ac89 100644
--- a/compiler/tflchef/tflite/src/Op/DepthToSpace.h
+++ b/compiler/tflchef/tflite/src/Op/include/DepthToSpace.h
diff --git a/compiler/tflchef/tflite/src/Op/DepthwiseConv2D.h b/compiler/tflchef/tflite/src/Op/include/DepthwiseConv2D.h
index c172536b4..c172536b4 100644
--- a/compiler/tflchef/tflite/src/Op/DepthwiseConv2D.h
+++ b/compiler/tflchef/tflite/src/Op/include/DepthwiseConv2D.h
diff --git a/compiler/tflchef/tflite/src/Op/Dequantize.h b/compiler/tflchef/tflite/src/Op/include/Dequantize.h
index df1c7bbdb..df1c7bbdb 100644
--- a/compiler/tflchef/tflite/src/Op/Dequantize.h
+++ b/compiler/tflchef/tflite/src/Op/include/Dequantize.h
diff --git a/compiler/tflchef/tflite/src/Op/Div.h b/compiler/tflchef/tflite/src/Op/include/Div.h
index 254a4cd99..254a4cd99 100644
--- a/compiler/tflchef/tflite/src/Op/Div.h
+++ b/compiler/tflchef/tflite/src/Op/include/Div.h
diff --git a/compiler/tflchef/tflite/src/Op/ELU.h b/compiler/tflchef/tflite/src/Op/include/ELU.h
index 490c9fde4..490c9fde4 100644
--- a/compiler/tflchef/tflite/src/Op/ELU.h
+++ b/compiler/tflchef/tflite/src/Op/include/ELU.h
diff --git a/compiler/tflchef/tflite/src/Op/Equal.h b/compiler/tflchef/tflite/src/Op/include/Equal.h
index fd4b40001..fd4b40001 100644
--- a/compiler/tflchef/tflite/src/Op/Equal.h
+++ b/compiler/tflchef/tflite/src/Op/include/Equal.h
diff --git a/compiler/tflchef/tflite/src/Op/Exp.h b/compiler/tflchef/tflite/src/Op/include/Exp.h
index 5ff3ddc8b..5ff3ddc8b 100644
--- a/compiler/tflchef/tflite/src/Op/Exp.h
+++ b/compiler/tflchef/tflite/src/Op/include/Exp.h
diff --git a/compiler/tflchef/tflite/src/Op/ExpandDims.h b/compiler/tflchef/tflite/src/Op/include/ExpandDims.h
index e2f3e4e50..e2f3e4e50 100644
--- a/compiler/tflchef/tflite/src/Op/ExpandDims.h
+++ b/compiler/tflchef/tflite/src/Op/include/ExpandDims.h
diff --git a/compiler/tflchef/tflite/src/Op/FakeQuant.h b/compiler/tflchef/tflite/src/Op/include/FakeQuant.h
index f36e615df..f36e615df 100644
--- a/compiler/tflchef/tflite/src/Op/FakeQuant.h
+++ b/compiler/tflchef/tflite/src/Op/include/FakeQuant.h
diff --git a/compiler/tflchef/tflite/src/Op/Fill.h b/compiler/tflchef/tflite/src/Op/include/Fill.h
index 4f46f628a..4f46f628a 100644
--- a/compiler/tflchef/tflite/src/Op/Fill.h
+++ b/compiler/tflchef/tflite/src/Op/include/Fill.h
diff --git a/compiler/tflchef/tflite/src/Op/Floor.h b/compiler/tflchef/tflite/src/Op/include/Floor.h
index f0f8ef38a..f0f8ef38a 100644
--- a/compiler/tflchef/tflite/src/Op/Floor.h
+++ b/compiler/tflchef/tflite/src/Op/include/Floor.h
diff --git a/compiler/tflchef/tflite/src/Op/FloorDiv.h b/compiler/tflchef/tflite/src/Op/include/FloorDiv.h
index 5d049a668..5d049a668 100644
--- a/compiler/tflchef/tflite/src/Op/FloorDiv.h
+++ b/compiler/tflchef/tflite/src/Op/include/FloorDiv.h
diff --git a/compiler/tflchef/tflite/src/Op/FloorMod.h b/compiler/tflchef/tflite/src/Op/include/FloorMod.h
index f36dfe813..f36dfe813 100644
--- a/compiler/tflchef/tflite/src/Op/FloorMod.h
+++ b/compiler/tflchef/tflite/src/Op/include/FloorMod.h
diff --git a/compiler/tflchef/tflite/src/Op/FullyConnected.h b/compiler/tflchef/tflite/src/Op/include/FullyConnected.h
index 8fbe1f3ed..8fbe1f3ed 100644
--- a/compiler/tflchef/tflite/src/Op/FullyConnected.h
+++ b/compiler/tflchef/tflite/src/Op/include/FullyConnected.h
diff --git a/compiler/tflchef/tflite/src/Op/Gather.h b/compiler/tflchef/tflite/src/Op/include/Gather.h
index e01276b76..e01276b76 100644
--- a/compiler/tflchef/tflite/src/Op/Gather.h
+++ b/compiler/tflchef/tflite/src/Op/include/Gather.h
diff --git a/compiler/tflchef/tflite/src/Op/GatherNd.h b/compiler/tflchef/tflite/src/Op/include/GatherNd.h
index 112f23d33..112f23d33 100644
--- a/compiler/tflchef/tflite/src/Op/GatherNd.h
+++ b/compiler/tflchef/tflite/src/Op/include/GatherNd.h
diff --git a/compiler/tflchef/tflite/src/Op/Greater.h b/compiler/tflchef/tflite/src/Op/include/Greater.h
index 3ab2d1a4e..3ab2d1a4e 100644
--- a/compiler/tflchef/tflite/src/Op/Greater.h
+++ b/compiler/tflchef/tflite/src/Op/include/Greater.h
diff --git a/compiler/tflchef/tflite/src/Op/GreaterEqual.h b/compiler/tflchef/tflite/src/Op/include/GreaterEqual.h
index 96b0af78a..96b0af78a 100644
--- a/compiler/tflchef/tflite/src/Op/GreaterEqual.h
+++ b/compiler/tflchef/tflite/src/Op/include/GreaterEqual.h
diff --git a/compiler/tflchef/tflite/src/Op/L2Normalize.h b/compiler/tflchef/tflite/src/Op/include/L2Normalize.h
index a73eae6c8..a73eae6c8 100644
--- a/compiler/tflchef/tflite/src/Op/L2Normalize.h
+++ b/compiler/tflchef/tflite/src/Op/include/L2Normalize.h
diff --git a/compiler/tflchef/tflite/src/Op/L2Pool2D.h b/compiler/tflchef/tflite/src/Op/include/L2Pool2D.h
index 046353440..046353440 100644
--- a/compiler/tflchef/tflite/src/Op/L2Pool2D.h
+++ b/compiler/tflchef/tflite/src/Op/include/L2Pool2D.h
diff --git a/compiler/tflchef/tflite/src/Op/LeakyRelu.h b/compiler/tflchef/tflite/src/Op/include/LeakyRelu.h
index 28e63e0ca..28e63e0ca 100644
--- a/compiler/tflchef/tflite/src/Op/LeakyRelu.h
+++ b/compiler/tflchef/tflite/src/Op/include/LeakyRelu.h
diff --git a/compiler/tflchef/tflite/src/Op/Less.h b/compiler/tflchef/tflite/src/Op/include/Less.h
index 1316cb613..1316cb613 100644
--- a/compiler/tflchef/tflite/src/Op/Less.h
+++ b/compiler/tflchef/tflite/src/Op/include/Less.h
diff --git a/compiler/tflchef/tflite/src/Op/LessEqual.h b/compiler/tflchef/tflite/src/Op/include/LessEqual.h
index 81c710fbc..81c710fbc 100644
--- a/compiler/tflchef/tflite/src/Op/LessEqual.h
+++ b/compiler/tflchef/tflite/src/Op/include/LessEqual.h
diff --git a/compiler/tflchef/tflite/src/Op/LocalResponseNormalization.h b/compiler/tflchef/tflite/src/Op/include/LocalResponseNormalization.h
index c0eb3f2b1..c0eb3f2b1 100644
--- a/compiler/tflchef/tflite/src/Op/LocalResponseNormalization.h
+++ b/compiler/tflchef/tflite/src/Op/include/LocalResponseNormalization.h
diff --git a/compiler/tflchef/tflite/src/Op/Log.h b/compiler/tflchef/tflite/src/Op/include/Log.h
index 9d17e2f81..9d17e2f81 100644
--- a/compiler/tflchef/tflite/src/Op/Log.h
+++ b/compiler/tflchef/tflite/src/Op/include/Log.h
diff --git a/compiler/tflchef/tflite/src/Op/LogSoftmax.h b/compiler/tflchef/tflite/src/Op/include/LogSoftmax.h
index efd81f3e9..efd81f3e9 100644
--- a/compiler/tflchef/tflite/src/Op/LogSoftmax.h
+++ b/compiler/tflchef/tflite/src/Op/include/LogSoftmax.h
diff --git a/compiler/tflchef/tflite/src/Op/LogicalAnd.h b/compiler/tflchef/tflite/src/Op/include/LogicalAnd.h
index 1f7a964b9..1f7a964b9 100644
--- a/compiler/tflchef/tflite/src/Op/LogicalAnd.h
+++ b/compiler/tflchef/tflite/src/Op/include/LogicalAnd.h
diff --git a/compiler/tflchef/tflite/src/Op/LogicalNot.h b/compiler/tflchef/tflite/src/Op/include/LogicalNot.h
index b75d33554..b75d33554 100644
--- a/compiler/tflchef/tflite/src/Op/LogicalNot.h
+++ b/compiler/tflchef/tflite/src/Op/include/LogicalNot.h
diff --git a/compiler/tflchef/tflite/src/Op/LogicalOr.h b/compiler/tflchef/tflite/src/Op/include/LogicalOr.h
index 5331a0d65..5331a0d65 100644
--- a/compiler/tflchef/tflite/src/Op/LogicalOr.h
+++ b/compiler/tflchef/tflite/src/Op/include/LogicalOr.h
diff --git a/compiler/tflchef/tflite/src/Op/Logistic.h b/compiler/tflchef/tflite/src/Op/include/Logistic.h
index a75bf490e..a75bf490e 100644
--- a/compiler/tflchef/tflite/src/Op/Logistic.h
+++ b/compiler/tflchef/tflite/src/Op/include/Logistic.h
diff --git a/compiler/tflchef/tflite/src/Op/MatrixDiag.h b/compiler/tflchef/tflite/src/Op/include/MatrixDiag.h
index 4074f2c36..4074f2c36 100644
--- a/compiler/tflchef/tflite/src/Op/MatrixDiag.h
+++ b/compiler/tflchef/tflite/src/Op/include/MatrixDiag.h
diff --git a/compiler/tflchef/tflite/src/Op/MatrixSetDiag.h b/compiler/tflchef/tflite/src/Op/include/MatrixSetDiag.h
index 0e7ec7f32..0e7ec7f32 100644
--- a/compiler/tflchef/tflite/src/Op/MatrixSetDiag.h
+++ b/compiler/tflchef/tflite/src/Op/include/MatrixSetDiag.h
diff --git a/compiler/tflchef/tflite/src/Op/MaxPool2D.h b/compiler/tflchef/tflite/src/Op/include/MaxPool2D.h
index 36533f80c..36533f80c 100644
--- a/compiler/tflchef/tflite/src/Op/MaxPool2D.h
+++ b/compiler/tflchef/tflite/src/Op/include/MaxPool2D.h
diff --git a/compiler/tflchef/tflite/src/Op/Maximum.h b/compiler/tflchef/tflite/src/Op/include/Maximum.h
index acafec343..acafec343 100644
--- a/compiler/tflchef/tflite/src/Op/Maximum.h
+++ b/compiler/tflchef/tflite/src/Op/include/Maximum.h
diff --git a/compiler/tflchef/tflite/src/Op/Mean.h b/compiler/tflchef/tflite/src/Op/include/Mean.h
index 532c40c66..532c40c66 100644
--- a/compiler/tflchef/tflite/src/Op/Mean.h
+++ b/compiler/tflchef/tflite/src/Op/include/Mean.h
diff --git a/compiler/tflchef/tflite/src/Op/Minimum.h b/compiler/tflchef/tflite/src/Op/include/Minimum.h
index 5db5b7940..5db5b7940 100644
--- a/compiler/tflchef/tflite/src/Op/Minimum.h
+++ b/compiler/tflchef/tflite/src/Op/include/Minimum.h
diff --git a/compiler/tflchef/tflite/src/Op/MirrorPad.h b/compiler/tflchef/tflite/src/Op/include/MirrorPad.h
index c9acdd498..c9acdd498 100644
--- a/compiler/tflchef/tflite/src/Op/MirrorPad.h
+++ b/compiler/tflchef/tflite/src/Op/include/MirrorPad.h
diff --git a/compiler/tflchef/tflite/src/Op/Mul.h b/compiler/tflchef/tflite/src/Op/include/Mul.h
index fd009d2fd..fd009d2fd 100644
--- a/compiler/tflchef/tflite/src/Op/Mul.h
+++ b/compiler/tflchef/tflite/src/Op/include/Mul.h
diff --git a/compiler/tflchef/tflite/src/Op/Neg.h b/compiler/tflchef/tflite/src/Op/include/Neg.h
index c77ab7e84..c77ab7e84 100644
--- a/compiler/tflchef/tflite/src/Op/Neg.h
+++ b/compiler/tflchef/tflite/src/Op/include/Neg.h
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.h b/compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV4.h
index 114a2ad2f..114a2ad2f 100644
--- a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.h
+++ b/compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV4.h
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h b/compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV5.h
index c948043f4..c948043f4 100644
--- a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h
+++ b/compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV5.h
diff --git a/compiler/tflchef/tflite/src/Op/NotEqual.h b/compiler/tflchef/tflite/src/Op/include/NotEqual.h
index b1febdcc5..b1febdcc5 100644
--- a/compiler/tflchef/tflite/src/Op/NotEqual.h
+++ b/compiler/tflchef/tflite/src/Op/include/NotEqual.h
diff --git a/compiler/tflchef/tflite/src/Op/OneHot.h b/compiler/tflchef/tflite/src/Op/include/OneHot.h
index 50bbed095..50bbed095 100644
--- a/compiler/tflchef/tflite/src/Op/OneHot.h
+++ b/compiler/tflchef/tflite/src/Op/include/OneHot.h
diff --git a/compiler/tflchef/tflite/src/Op/PRelu.h b/compiler/tflchef/tflite/src/Op/include/PRelu.h
index b35c6e7ce..b35c6e7ce 100644
--- a/compiler/tflchef/tflite/src/Op/PRelu.h
+++ b/compiler/tflchef/tflite/src/Op/include/PRelu.h
diff --git a/compiler/tflchef/tflite/src/Op/Pack.h b/compiler/tflchef/tflite/src/Op/include/Pack.h
index 7779f64ed..7779f64ed 100644
--- a/compiler/tflchef/tflite/src/Op/Pack.h
+++ b/compiler/tflchef/tflite/src/Op/include/Pack.h
diff --git a/compiler/tflchef/tflite/src/Op/Pad.h b/compiler/tflchef/tflite/src/Op/include/Pad.h
index 99998d418..99998d418 100644
--- a/compiler/tflchef/tflite/src/Op/Pad.h
+++ b/compiler/tflchef/tflite/src/Op/include/Pad.h
diff --git a/compiler/tflchef/tflite/src/Op/PadV2.h b/compiler/tflchef/tflite/src/Op/include/PadV2.h
index 3aa474b92..3aa474b92 100644
--- a/compiler/tflchef/tflite/src/Op/PadV2.h
+++ b/compiler/tflchef/tflite/src/Op/include/PadV2.h
diff --git a/compiler/tflchef/tflite/src/Op/Pow.h b/compiler/tflchef/tflite/src/Op/include/Pow.h
index 20e847377..20e847377 100644
--- a/compiler/tflchef/tflite/src/Op/Pow.h
+++ b/compiler/tflchef/tflite/src/Op/include/Pow.h
diff --git a/compiler/tflchef/tflite/src/Op/Quantize.h b/compiler/tflchef/tflite/src/Op/include/Quantize.h
index 256ed5a5c..256ed5a5c 100644
--- a/compiler/tflchef/tflite/src/Op/Quantize.h
+++ b/compiler/tflchef/tflite/src/Op/include/Quantize.h
diff --git a/compiler/tflchef/tflite/src/Op/Range.h b/compiler/tflchef/tflite/src/Op/include/Range.h
index ad10dc58b..ad10dc58b 100644
--- a/compiler/tflchef/tflite/src/Op/Range.h
+++ b/compiler/tflchef/tflite/src/Op/include/Range.h
diff --git a/compiler/tflchef/tflite/src/Op/Rank.h b/compiler/tflchef/tflite/src/Op/include/Rank.h
index 003d9d310..003d9d310 100644
--- a/compiler/tflchef/tflite/src/Op/Rank.h
+++ b/compiler/tflchef/tflite/src/Op/include/Rank.h
diff --git a/compiler/tflchef/tflite/src/Op/ReLU.h b/compiler/tflchef/tflite/src/Op/include/ReLU.h
index be1090270..be1090270 100644
--- a/compiler/tflchef/tflite/src/Op/ReLU.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReLU.h
diff --git a/compiler/tflchef/tflite/src/Op/ReLU6.h b/compiler/tflchef/tflite/src/Op/include/ReLU6.h
index 64ddb6a2e..64ddb6a2e 100644
--- a/compiler/tflchef/tflite/src/Op/ReLU6.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReLU6.h
diff --git a/compiler/tflchef/tflite/src/Op/ReLUN1To1.h b/compiler/tflchef/tflite/src/Op/include/ReLUN1To1.h
index 0767006af..0767006af 100644
--- a/compiler/tflchef/tflite/src/Op/ReLUN1To1.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReLUN1To1.h
diff --git a/compiler/tflchef/tflite/src/Op/ReduceAny.h b/compiler/tflchef/tflite/src/Op/include/ReduceAny.h
index dd5e361d5..dd5e361d5 100644
--- a/compiler/tflchef/tflite/src/Op/ReduceAny.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReduceAny.h
diff --git a/compiler/tflchef/tflite/src/Op/ReduceMax.h b/compiler/tflchef/tflite/src/Op/include/ReduceMax.h
index 8e65cf47c..8e65cf47c 100644
--- a/compiler/tflchef/tflite/src/Op/ReduceMax.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReduceMax.h
diff --git a/compiler/tflchef/tflite/src/Op/ReduceMin.h b/compiler/tflchef/tflite/src/Op/include/ReduceMin.h
index 88cba6fe7..88cba6fe7 100644
--- a/compiler/tflchef/tflite/src/Op/ReduceMin.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReduceMin.h
diff --git a/compiler/tflchef/tflite/src/Op/ReduceProd.h b/compiler/tflchef/tflite/src/Op/include/ReduceProd.h
index e7766840a..e7766840a 100644
--- a/compiler/tflchef/tflite/src/Op/ReduceProd.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReduceProd.h
diff --git a/compiler/tflchef/tflite/src/Op/Reshape.h b/compiler/tflchef/tflite/src/Op/include/Reshape.h
index be9fdac08..be9fdac08 100644
--- a/compiler/tflchef/tflite/src/Op/Reshape.h
+++ b/compiler/tflchef/tflite/src/Op/include/Reshape.h
diff --git a/compiler/tflchef/tflite/src/Op/ResizeBilinear.h b/compiler/tflchef/tflite/src/Op/include/ResizeBilinear.h
index 98c49c534..98c49c534 100644
--- a/compiler/tflchef/tflite/src/Op/ResizeBilinear.h
+++ b/compiler/tflchef/tflite/src/Op/include/ResizeBilinear.h
diff --git a/compiler/tflchef/tflite/src/Op/ResizeNearestNeighbor.h b/compiler/tflchef/tflite/src/Op/include/ResizeNearestNeighbor.h
index 5090bb938..5090bb938 100644
--- a/compiler/tflchef/tflite/src/Op/ResizeNearestNeighbor.h
+++ b/compiler/tflchef/tflite/src/Op/include/ResizeNearestNeighbor.h
diff --git a/compiler/tflchef/tflite/src/Op/ReverseSequence.h b/compiler/tflchef/tflite/src/Op/include/ReverseSequence.h
index 8c8c811e4..8c8c811e4 100644
--- a/compiler/tflchef/tflite/src/Op/ReverseSequence.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReverseSequence.h
diff --git a/compiler/tflchef/tflite/src/Op/ReverseV2.h b/compiler/tflchef/tflite/src/Op/include/ReverseV2.h
index 6a8a75e6b..6a8a75e6b 100644
--- a/compiler/tflchef/tflite/src/Op/ReverseV2.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReverseV2.h
diff --git a/compiler/tflchef/tflite/src/Op/Round.h b/compiler/tflchef/tflite/src/Op/include/Round.h
index df0da3fa1..df0da3fa1 100644
--- a/compiler/tflchef/tflite/src/Op/Round.h
+++ b/compiler/tflchef/tflite/src/Op/include/Round.h
diff --git a/compiler/tflchef/tflite/src/Op/Rsqrt.h b/compiler/tflchef/tflite/src/Op/include/Rsqrt.h
index 5d68344c2..5d68344c2 100644
--- a/compiler/tflchef/tflite/src/Op/Rsqrt.h
+++ b/compiler/tflchef/tflite/src/Op/include/Rsqrt.h
diff --git a/compiler/tflchef/tflite/src/Op/SVDF.h b/compiler/tflchef/tflite/src/Op/include/SVDF.h
index a59ca54a2..a59ca54a2 100644
--- a/compiler/tflchef/tflite/src/Op/SVDF.h
+++ b/compiler/tflchef/tflite/src/Op/include/SVDF.h
diff --git a/compiler/tflchef/tflite/src/Op/ScatterNd.h b/compiler/tflchef/tflite/src/Op/include/ScatterNd.h
index 76362d775..76362d775 100644
--- a/compiler/tflchef/tflite/src/Op/ScatterNd.h
+++ b/compiler/tflchef/tflite/src/Op/include/ScatterNd.h
diff --git a/compiler/tflchef/tflite/src/Op/SegmentSum.h b/compiler/tflchef/tflite/src/Op/include/SegmentSum.h
index d20e63bd7..d20e63bd7 100644
--- a/compiler/tflchef/tflite/src/Op/SegmentSum.h
+++ b/compiler/tflchef/tflite/src/Op/include/SegmentSum.h
diff --git a/compiler/tflchef/tflite/src/Op/Select.h b/compiler/tflchef/tflite/src/Op/include/Select.h
index bf8e57d78..bf8e57d78 100644
--- a/compiler/tflchef/tflite/src/Op/Select.h
+++ b/compiler/tflchef/tflite/src/Op/include/Select.h
diff --git a/compiler/tflchef/tflite/src/Op/SelectV2.h b/compiler/tflchef/tflite/src/Op/include/SelectV2.h
index ff03341d7..ff03341d7 100644
--- a/compiler/tflchef/tflite/src/Op/SelectV2.h
+++ b/compiler/tflchef/tflite/src/Op/include/SelectV2.h
diff --git a/compiler/tflchef/tflite/src/Op/Shape.h b/compiler/tflchef/tflite/src/Op/include/Shape.h
index ebe1befb3..ebe1befb3 100644
--- a/compiler/tflchef/tflite/src/Op/Shape.h
+++ b/compiler/tflchef/tflite/src/Op/include/Shape.h
diff --git a/compiler/tflchef/tflite/src/Op/Sin.h b/compiler/tflchef/tflite/src/Op/include/Sin.h
index 51eabceb5..51eabceb5 100644
--- a/compiler/tflchef/tflite/src/Op/Sin.h
+++ b/compiler/tflchef/tflite/src/Op/include/Sin.h
diff --git a/compiler/tflchef/tflite/src/Op/Slice.h b/compiler/tflchef/tflite/src/Op/include/Slice.h
index 6ca6724d3..6ca6724d3 100644
--- a/compiler/tflchef/tflite/src/Op/Slice.h
+++ b/compiler/tflchef/tflite/src/Op/include/Slice.h
diff --git a/compiler/tflchef/tflite/src/Op/Softmax.h b/compiler/tflchef/tflite/src/Op/include/Softmax.h
index cf168bdd9..cf168bdd9 100644
--- a/compiler/tflchef/tflite/src/Op/Softmax.h
+++ b/compiler/tflchef/tflite/src/Op/include/Softmax.h
diff --git a/compiler/tflchef/tflite/src/Op/SpaceToBatchND.h b/compiler/tflchef/tflite/src/Op/include/SpaceToBatchND.h
index 9d7bc44e8..9d7bc44e8 100644
--- a/compiler/tflchef/tflite/src/Op/SpaceToBatchND.h
+++ b/compiler/tflchef/tflite/src/Op/include/SpaceToBatchND.h
diff --git a/compiler/tflchef/tflite/src/Op/SpaceToDepth.h b/compiler/tflchef/tflite/src/Op/include/SpaceToDepth.h
index 784ad940a..784ad940a 100644
--- a/compiler/tflchef/tflite/src/Op/SpaceToDepth.h
+++ b/compiler/tflchef/tflite/src/Op/include/SpaceToDepth.h
diff --git a/compiler/tflchef/tflite/src/Op/SparseToDense.h b/compiler/tflchef/tflite/src/Op/include/SparseToDense.h
index 5ffe4789d..5ffe4789d 100644
--- a/compiler/tflchef/tflite/src/Op/SparseToDense.h
+++ b/compiler/tflchef/tflite/src/Op/include/SparseToDense.h
diff --git a/compiler/tflchef/tflite/src/Op/Split.h b/compiler/tflchef/tflite/src/Op/include/Split.h
index af247a1b9..af247a1b9 100644
--- a/compiler/tflchef/tflite/src/Op/Split.h
+++ b/compiler/tflchef/tflite/src/Op/include/Split.h
diff --git a/compiler/tflchef/tflite/src/Op/SplitV.h b/compiler/tflchef/tflite/src/Op/include/SplitV.h
index 3f715b5f9..3f715b5f9 100644
--- a/compiler/tflchef/tflite/src/Op/SplitV.h
+++ b/compiler/tflchef/tflite/src/Op/include/SplitV.h
diff --git a/compiler/tflchef/tflite/src/Op/Sqrt.h b/compiler/tflchef/tflite/src/Op/include/Sqrt.h
index 9f0ad04ae..9f0ad04ae 100644
--- a/compiler/tflchef/tflite/src/Op/Sqrt.h
+++ b/compiler/tflchef/tflite/src/Op/include/Sqrt.h
diff --git a/compiler/tflchef/tflite/src/Op/Square.h b/compiler/tflchef/tflite/src/Op/include/Square.h
index 9c008fe52..9c008fe52 100644
--- a/compiler/tflchef/tflite/src/Op/Square.h
+++ b/compiler/tflchef/tflite/src/Op/include/Square.h
diff --git a/compiler/tflchef/tflite/src/Op/SquaredDifference.h b/compiler/tflchef/tflite/src/Op/include/SquaredDifference.h
index 58c2ed460..58c2ed460 100644
--- a/compiler/tflchef/tflite/src/Op/SquaredDifference.h
+++ b/compiler/tflchef/tflite/src/Op/include/SquaredDifference.h
diff --git a/compiler/tflchef/tflite/src/Op/Squeeze.h b/compiler/tflchef/tflite/src/Op/include/Squeeze.h
index b6c89f73d..b6c89f73d 100644
--- a/compiler/tflchef/tflite/src/Op/Squeeze.h
+++ b/compiler/tflchef/tflite/src/Op/include/Squeeze.h
diff --git a/compiler/tflchef/tflite/src/Op/StridedSlice.h b/compiler/tflchef/tflite/src/Op/include/StridedSlice.h
index 98054b9b9..98054b9b9 100644
--- a/compiler/tflchef/tflite/src/Op/StridedSlice.h
+++ b/compiler/tflchef/tflite/src/Op/include/StridedSlice.h
diff --git a/compiler/tflchef/tflite/src/Op/Sub.h b/compiler/tflchef/tflite/src/Op/include/Sub.h
index 2168e5e0d..2168e5e0d 100644
--- a/compiler/tflchef/tflite/src/Op/Sub.h
+++ b/compiler/tflchef/tflite/src/Op/include/Sub.h
diff --git a/compiler/tflchef/tflite/src/Op/Sum.h b/compiler/tflchef/tflite/src/Op/include/Sum.h
index 38eeb080d..38eeb080d 100644
--- a/compiler/tflchef/tflite/src/Op/Sum.h
+++ b/compiler/tflchef/tflite/src/Op/include/Sum.h
diff --git a/compiler/tflchef/tflite/src/Op/Tanh.h b/compiler/tflchef/tflite/src/Op/include/Tanh.h
index 7339e4103..7339e4103 100644
--- a/compiler/tflchef/tflite/src/Op/Tanh.h
+++ b/compiler/tflchef/tflite/src/Op/include/Tanh.h
diff --git a/compiler/tflchef/tflite/src/Op/Tile.h b/compiler/tflchef/tflite/src/Op/include/Tile.h
index 640f52a1f..640f52a1f 100644
--- a/compiler/tflchef/tflite/src/Op/Tile.h
+++ b/compiler/tflchef/tflite/src/Op/include/Tile.h
diff --git a/compiler/tflchef/tflite/src/Op/TopKV2.h b/compiler/tflchef/tflite/src/Op/include/TopKV2.h
index b2b74cc75..b2b74cc75 100644
--- a/compiler/tflchef/tflite/src/Op/TopKV2.h
+++ b/compiler/tflchef/tflite/src/Op/include/TopKV2.h
diff --git a/compiler/tflchef/tflite/src/Op/Transpose.h b/compiler/tflchef/tflite/src/Op/include/Transpose.h
index f0d944b6b..f0d944b6b 100644
--- a/compiler/tflchef/tflite/src/Op/Transpose.h
+++ b/compiler/tflchef/tflite/src/Op/include/Transpose.h
diff --git a/compiler/tflchef/tflite/src/Op/TransposeConv.h b/compiler/tflchef/tflite/src/Op/include/TransposeConv.h
index c79cdabd2..c79cdabd2 100644
--- a/compiler/tflchef/tflite/src/Op/TransposeConv.h
+++ b/compiler/tflchef/tflite/src/Op/include/TransposeConv.h
diff --git a/compiler/tflchef/tflite/src/Op/UnidirectionalSequenceLSTM.h b/compiler/tflchef/tflite/src/Op/include/UnidirectionalSequenceLSTM.h
index cc4e5fb0f..cc4e5fb0f 100644
--- a/compiler/tflchef/tflite/src/Op/UnidirectionalSequenceLSTM.h
+++ b/compiler/tflchef/tflite/src/Op/include/UnidirectionalSequenceLSTM.h
diff --git a/compiler/tflchef/tflite/src/Op/Unique.h b/compiler/tflchef/tflite/src/Op/include/Unique.h
index fae037c9f..fae037c9f 100644
--- a/compiler/tflchef/tflite/src/Op/Unique.h
+++ b/compiler/tflchef/tflite/src/Op/include/Unique.h
diff --git a/compiler/tflchef/tflite/src/Op/Unpack.h b/compiler/tflchef/tflite/src/Op/include/Unpack.h
index 1036bdc14..1036bdc14 100644
--- a/compiler/tflchef/tflite/src/Op/Unpack.h
+++ b/compiler/tflchef/tflite/src/Op/include/Unpack.h
diff --git a/compiler/tflchef/tflite/src/Op/Where.h b/compiler/tflchef/tflite/src/Op/include/Where.h
index 00cdc4b00..00cdc4b00 100644
--- a/compiler/tflchef/tflite/src/Op/Where.h
+++ b/compiler/tflchef/tflite/src/Op/include/Where.h
diff --git a/compiler/tflchef/tflite/src/Op/ZerosLike.h b/compiler/tflchef/tflite/src/Op/include/ZerosLike.h
index 163c1fa21..163c1fa21 100644
--- a/compiler/tflchef/tflite/src/Op/ZerosLike.h
+++ b/compiler/tflchef/tflite/src/Op/include/ZerosLike.h
diff --git a/compiler/tflchef/tflite/src/TFliteOpChefs.h b/compiler/tflchef/tflite/src/TFliteOpChefs.h
index b38b35a61..1b9d420e5 100644
--- a/compiler/tflchef/tflite/src/TFliteOpChefs.h
+++ b/compiler/tflchef/tflite/src/TFliteOpChefs.h
@@ -18,115 +18,115 @@
 #define __TFLITE_OP_CHEFS_H__
 
 // In alphabet order
-#include "Op/Abs.h"
-#include "Op/Add.h"
-#include "Op/AddN.h"
-#include "Op/ArgMax.h"
-#include "Op/ArgMin.h"
-#include "Op/AveragePool2D.h"
-#include "Op/BatchMatMul.h"
-#include "Op/BatchToSpaceND.h"
-#include "Op/BidirectionalSequenceLSTM.h"
-#include "Op/Cast.h"
-#include "Op/Ceil.h"
-#include "Op/Concatenation.h"
-#include "Op/Conv2D.h"
-#include "Op/Cos.h"
-#include "Op/DepthToSpace.h"
-#include "Op/DepthwiseConv2D.h"
-#include "Op/Dequantize.h"
-#include "Op/Div.h"
-#include "Op/ELU.h"
-#include "Op/Equal.h"
-#include "Op/Exp.h"
-#include "Op/ExpandDims.h"
-#include "Op/FakeQuant.h"
-#include "Op/Fill.h"
-#include "Op/Floor.h"
-#include "Op/FloorDiv.h"
-#include "Op/FloorMod.h"
-#include "Op/FullyConnected.h"
-#include "Op/Gather.h"
-#include "Op/GatherNd.h"
-#include "Op/Greater.h"
-#include "Op/GreaterEqual.h"
-#include "Op/L2Normalize.h"
-#include "Op/L2Pool2D.h"
-#include "Op/LeakyRelu.h"
-#include "Op/Less.h"
-#include "Op/LessEqual.h"
-#include "Op/LocalResponseNormalization.h"
-#include "Op/Log.h"
-#include "Op/LogicalAnd.h"
-#include "Op/LogicalNot.h"
-#include "Op/LogicalOr.h"
-#include "Op/Logistic.h"
-#include "Op/LogSoftmax.h"
-#include "Op/MatrixDiag.h"
-#include "Op/MatrixSetDiag.h"
-#include "Op/Maximum.h"
-#include "Op/MaxPool2D.h"
-#include "Op/Mean.h"
-#include "Op/Minimum.h"
-#include "Op/MirrorPad.h"
-#include "Op/Mul.h"
-#include "Op/Neg.h"
-#include "Op/NonMaxSuppressionV4.h"
-#include "Op/NonMaxSuppressionV5.h"
-#include "Op/NotEqual.h"
-#include "Op/OneHot.h"
-#include "Op/Pack.h"
-#include "Op/Pad.h"
-#include "Op/PadV2.h"
-#include "Op/Pow.h"
-#include "Op/PRelu.h"
-#include "Op/Quantize.h"
-#include "Op/Range.h"
-#include "Op/Rank.h"
-#include "Op/ReduceAny.h"
-#include "Op/ReduceMax.h"
-#include "Op/ReduceMin.h"
-#include "Op/ReduceProd.h"
-#include "Op/ReLU.h"
-#include "Op/ReLU6.h"
-#include "Op/ReLUN1To1.h"
-#include "Op/Reshape.h"
-#include "Op/ResizeBilinear.h"
-#include "Op/ResizeNearestNeighbor.h"
-#include "Op/ReverseSequence.h"
-#include "Op/ReverseV2.h"
-#include "Op/Round.h"
-#include "Op/Rsqrt.h"
-#include "Op/ScatterNd.h"
-#include "Op/SegmentSum.h"
-#include "Op/Select.h"
-#include "Op/SelectV2.h"
-#include "Op/Shape.h"
-#include "Op/Sin.h"
-#include "Op/Slice.h"
-#include "Op/Softmax.h"
-#include "Op/SpaceToBatchND.h"
-#include "Op/SpaceToDepth.h"
-#include "Op/SparseToDense.h"
-#include "Op/Split.h"
-#include "Op/SplitV.h"
-#include "Op/Sqrt.h"
-#include "Op/Square.h"
-#include "Op/SquaredDifference.h"
-#include "Op/Squeeze.h"
-#include "Op/StridedSlice.h"
-#include "Op/Sub.h"
-#include "Op/Sum.h"
-#include "Op/SVDF.h"
-#include "Op/Tanh.h"
-#include "Op/Tile.h"
-#include "Op/TopKV2.h"
-#include "Op/Transpose.h"
-#include "Op/TransposeConv.h"
-#include "Op/UnidirectionalSequenceLSTM.h"
-#include "Op/Unique.h"
-#include "Op/Unpack.h"
-#include "Op/Where.h"
-#include "Op/ZerosLike.h"
+#include "Op/include/Abs.h"
+#include "Op/include/Add.h"
+#include "Op/include/AddN.h"
+#include "Op/include/ArgMax.h"
+#include "Op/include/ArgMin.h"
+#include "Op/include/AveragePool2D.h"
+#include "Op/include/BatchMatMul.h"
+#include "Op/include/BatchToSpaceND.h"
+#include "Op/include/BidirectionalSequenceLSTM.h"
+#include "Op/include/Cast.h"
+#include "Op/include/Ceil.h"
+#include "Op/include/Concatenation.h"
+#include "Op/include/Conv2D.h"
+#include "Op/include/Cos.h"
+#include "Op/include/DepthToSpace.h"
+#include "Op/include/DepthwiseConv2D.h"
+#include "Op/include/Dequantize.h"
+#include "Op/include/Div.h"
+#include "Op/include/ELU.h"
+#include "Op/include/Equal.h"
+#include "Op/include/Exp.h"
+#include "Op/include/ExpandDims.h"
+#include "Op/include/FakeQuant.h"
+#include "Op/include/Fill.h"
+#include "Op/include/Floor.h"
+#include "Op/include/FloorDiv.h"
+#include "Op/include/FloorMod.h"
+#include "Op/include/FullyConnected.h"
+#include "Op/include/Gather.h"
+#include "Op/include/GatherNd.h"
+#include "Op/include/Greater.h"
+#include "Op/include/GreaterEqual.h"
+#include "Op/include/L2Normalize.h"
+#include "Op/include/L2Pool2D.h"
+#include "Op/include/LeakyRelu.h"
+#include "Op/include/Less.h"
+#include "Op/include/LessEqual.h"
+#include "Op/include/LocalResponseNormalization.h"
+#include "Op/include/Log.h"
+#include "Op/include/LogicalAnd.h"
+#include "Op/include/LogicalNot.h"
+#include "Op/include/LogicalOr.h"
+#include "Op/include/Logistic.h"
+#include "Op/include/LogSoftmax.h"
+#include "Op/include/MatrixDiag.h"
+#include "Op/include/MatrixSetDiag.h"
+#include "Op/include/Maximum.h"
+#include "Op/include/MaxPool2D.h"
+#include "Op/include/Mean.h"
+#include "Op/include/Minimum.h"
+#include "Op/include/MirrorPad.h"
+#include "Op/include/Mul.h"
+#include "Op/include/Neg.h"
+#include "Op/include/NonMaxSuppressionV4.h"
+#include "Op/include/NonMaxSuppressionV5.h"
+#include "Op/include/NotEqual.h"
+#include "Op/include/OneHot.h"
+#include "Op/include/Pack.h"
+#include "Op/include/Pad.h"
+#include "Op/include/PadV2.h"
+#include "Op/include/Pow.h"
+#include "Op/include/PRelu.h"
+#include "Op/include/Quantize.h"
+#include "Op/include/Range.h"
+#include "Op/include/Rank.h"
+#include "Op/include/ReduceAny.h"
+#include "Op/include/ReduceMax.h"
+#include "Op/include/ReduceMin.h"
+#include "Op/include/ReduceProd.h"
+#include "Op/include/ReLU.h"
+#include "Op/include/ReLU6.h"
+#include "Op/include/ReLUN1To1.h"
+#include "Op/include/Reshape.h"
+#include "Op/include/ResizeBilinear.h"
+#include "Op/include/ResizeNearestNeighbor.h"
+#include "Op/include/ReverseSequence.h"
+#include "Op/include/ReverseV2.h"
+#include "Op/include/Round.h"
+#include "Op/include/Rsqrt.h"
+#include "Op/include/ScatterNd.h"
+#include "Op/include/SegmentSum.h"
+#include "Op/include/Select.h"
+#include "Op/include/SelectV2.h"
+#include "Op/include/Shape.h"
+#include "Op/include/Sin.h"
+#include "Op/include/Slice.h"
+#include "Op/include/Softmax.h"
+#include "Op/include/SpaceToBatchND.h"
+#include "Op/include/SpaceToDepth.h"
+#include "Op/include/SparseToDense.h"
+#include "Op/include/Split.h"
+#include "Op/include/SplitV.h"
+#include "Op/include/Sqrt.h"
+#include "Op/include/Square.h"
+#include "Op/include/SquaredDifference.h"
+#include "Op/include/Squeeze.h"
+#include "Op/include/StridedSlice.h"
+#include "Op/include/Sub.h"
+#include "Op/include/Sum.h"
+#include "Op/include/SVDF.h"
+#include "Op/include/Tanh.h"
+#include "Op/include/Tile.h"
+#include "Op/include/TopKV2.h"
+#include "Op/include/Transpose.h"
+#include "Op/include/TransposeConv.h"
+#include "Op/include/UnidirectionalSequenceLSTM.h"
+#include "Op/include/Unique.h"
+#include "Op/include/Unpack.h"
+#include "Op/include/Where.h"
+#include "Op/include/ZerosLike.h"
 
 #endif // __TFLITE_OP_CHEFS_H__
diff --git a/compiler/tflchef/tools/file/Driver.cpp b/compiler/tflchef/tools/file/Driver.cpp
index d4605ced3..f6c6789bd 100644
--- a/compiler/tflchef/tools/file/Driver.cpp
+++ b/compiler/tflchef/tools/file/Driver.cpp
@@ -28,10 +28,8 @@
 int entry(int argc, char **argv)
 {
   arser::Arser arser;
-  arser.add_argument("recipe")
-    .type(arser::DataType::STR)
-    .help("Source recipe file path to convert");
-  arser.add_argument("tflite").type(arser::DataType::STR).help("Target tflite file path");
+  arser.add_argument("recipe").help("Source recipe file path to convert");
+  arser.add_argument("tflite").help("Target tflite file path");
 
   try
   {
diff --git a/compiler/tflchef/tools/reverse/Driver.cpp b/compiler/tflchef/tools/reverse/Driver.cpp
index 1451e8bb8..119bee6be 100644
--- a/compiler/tflchef/tools/reverse/Driver.cpp
+++ b/compiler/tflchef/tools/reverse/Driver.cpp
@@ -25,10 +25,8 @@
 int entry(int argc, char **argv)
 {
   arser::Arser arser;
-  arser.add_argument("tflite")
-    .type(arser::DataType::STR)
-    .help("Source tflite file path to convert");
-  arser.add_argument("recipe").type(arser::DataType::STR).help("Target recipe file path");
+  arser.add_argument("tflite").help("Source tflite file path to convert");
+  arser.add_argument("recipe").help("Target recipe file path");
 
   try
   {
diff --git a/compiler/tfldump/CMakeLists.txt b/compiler/tfldump/CMakeLists.txt
index fac0be6bf..410232645 100644
--- a/compiler/tfldump/CMakeLists.txt
+++ b/compiler/tfldump/CMakeLists.txt
@@ -10,6 +10,7 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
 add_executable(tfldump ${DRIVER} ${SOURCES})
 target_include_directories(tfldump PRIVATE include)
 target_link_libraries(tfldump arser)
+target_link_libraries(tfldump foder)
 target_link_libraries(tfldump mio_tflite280)
 target_link_libraries(tfldump mio_tflite280_helper)
 target_link_libraries(tfldump safemain)
diff --git a/compiler/tfldump/driver/Driver.cpp b/compiler/tfldump/driver/Driver.cpp
index 38c9c062f..a3e748be1 100644
--- a/compiler/tfldump/driver/Driver.cpp
+++ b/compiler/tfldump/driver/Driver.cpp
@@ -15,7 +15,7 @@
  */
 
 #include <arser/arser.h>
-#include <tflread/Model.h>
+#include <foder/FileLoader.h>
 #include <tfldump/Dump.h>
 
 #include <iostream>
@@ -23,7 +23,7 @@
 int entry(int argc, char **argv)
 {
   arser::Arser arser;
-  arser.add_argument("tflite").type(arser::DataType::STR).help("TFLite file to dump");
+  arser.add_argument("tflite").help("TFLite file to dump");
 
   try
   {
@@ -38,14 +38,9 @@ int entry(int argc, char **argv)
 
   std::string tflite_path = arser.get<std::string>("tflite");
   // Load TF lite model from a tflite file
-  std::unique_ptr<tflread::Model> model = tflread::load_tflite(tflite_path);
-  if (model == nullptr)
-  {
-    std::cerr << "ERROR: Failed to load tflite '" << tflite_path << "'" << std::endl;
-    return 255;
-  }
-
-  const tflite::Model *tflmodel = model->model();
+  foder::FileLoader fileLoader{tflite_path};
+  std::vector<char> modelData = fileLoader.load();
+  const tflite::Model *tflmodel = tflite::GetModel(modelData.data());
   if (tflmodel == nullptr)
   {
     std::cerr << "ERROR: Failed to load tflite '" << tflite_path << "'" << std::endl;
diff --git a/compiler/tfldump/include/tflread/Model.h b/compiler/tfldump/include/tflread/Model.h
deleted file mode 100644
index c6e4a94ac..000000000
--- a/compiler/tfldump/include/tflread/Model.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __TFLREAD_MODEL_H__
-#define __TFLREAD_MODEL_H__
-
-#include <mio/tflite/schema_generated.h>
-
-#include <memory>
-
-namespace tflread
-{
-
-struct Model
-{
-  virtual ~Model() = default;
-
-  virtual const ::tflite::Model *model(void) const = 0;
-};
-
-/**
- * @brief Load TensorFlow Lite model (as a raw Model) from a given path
- *
- * @note May return a nullptr
- */
-std::unique_ptr<Model> load_tflite(const std::string &path);
-
-} // namespace tflread
-
-#endif // __TFLREAD_MODEL_H__
diff --git a/compiler/tfldump/requires.cmake b/compiler/tfldump/requires.cmake
index b1abf9486..a11f6b200 100644
--- a/compiler/tfldump/requires.cmake
+++ b/compiler/tfldump/requires.cmake
@@ -1,3 +1,4 @@
 require("arser")
+require("foder")
 require("mio-tflite280")
 require("safemain")
diff --git a/compiler/tfldump/src/Dump.cpp b/compiler/tfldump/src/Dump.cpp
index 2a87e47d7..4388fcde8 100644
--- a/compiler/tfldump/src/Dump.cpp
+++ b/compiler/tfldump/src/Dump.cpp
@@ -33,7 +33,7 @@ void dump_buffer(std::ostream &os, const uint8_t *buffer, size_t size, size_t am
   std::ios_base::fmtflags saveflags(os.flags());
 
   bool second = false;
-  bool ellipsis = amount > 0 && size > 4;
+  bool ellipsis = amount > 0 && size > 8;
   size_t count = ellipsis ? std::min(size, amount) : size;
 
   for (size_t i = 0; i < count; i++)
@@ -103,8 +103,8 @@ std::ostream &operator<<(std::ostream &os, const flatbuffers::Vector<T> *fbvect)
   if (fbvect == nullptr)
     return os;
 
-  bool ellipsis = (fbvect->size() > 4);
-  auto limit_size = ellipsis ? 4 : fbvect->size();
+  bool ellipsis = (fbvect->size() > 8);
+  auto limit_size = ellipsis ? 8 : fbvect->size();
 
   if (ellipsis)
   {
diff --git a/compiler/tfldump/src/Load.cpp b/compiler/tfldump/src/Load.cpp
deleted file mode 100644
index d2f6e06f1..000000000
--- a/compiler/tfldump/src/Load.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <tflread/Model.h>
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-
-namespace
-{
-
-class MemoryMappedModel final : public tflread::Model
-{
-public:
-  /**
-   * @require fd and data SHOULD be valid
-   */
-  explicit MemoryMappedModel(int fd, void *data, size_t size) : _fd{fd}, _data{data}, _size{size}
-  {
-    // DO NOTHING
-  }
-
-public:
-  ~MemoryMappedModel()
-  {
-    munmap(_data, _size);
-    close(_fd);
-  }
-
-public:
-  MemoryMappedModel(const MemoryMappedModel &) = delete;
-  MemoryMappedModel(MemoryMappedModel &&) = delete;
-
-public:
-  const ::tflite::Model *model(void) const override { return ::tflite::GetModel(_data); }
-
-private:
-  int _fd = -1;
-  void *_data = nullptr;
-  size_t _size = 0;
-};
-
-class FileDescriptor final
-{
-public:
-  FileDescriptor(int value) : _value{value}
-  {
-    // DO NOTHING
-  }
-
-public:
-  // NOTE Copy is not allowed
-  FileDescriptor(const FileDescriptor &) = delete;
-
-public:
-  // NOTE Move is allowed
-  FileDescriptor(FileDescriptor &&fd) { _value = fd.release(); }
-
-public:
-  ~FileDescriptor()
-  {
-    if (_value != -1)
-    {
-      // Close on destructor
-      close(_value);
-    }
-  }
-
-public:
-  int value(void) const { return _value; }
-
-public:
-  int release(void)
-  {
-    auto res = _value;
-    _value = -1;
-    return res;
-  }
-
-private:
-  int _value = -1;
-};
-
-} // namespace
-
-namespace tflread
-{
-
-std::unique_ptr<Model> load_tflite(const std::string &path)
-{
-  FileDescriptor fd = open(path.c_str(), O_RDONLY);
-
-  if (fd.value() == -1)
-  {
-    // Return nullptr on open failure
-    return nullptr;
-  }
-
-  struct stat st;
-  if (fstat(fd.value(), &st) == -1)
-  {
-    // Return nullptr on fstat failure
-    return nullptr;
-  }
-
-  auto size = st.st_size;
-  auto data = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd.value(), 0);
-
-  if (data == MAP_FAILED)
-  {
-    // Return nullptr on mmap failure
-    return nullptr;
-  }
-
-  return std::unique_ptr<tflread::Model>{new MemoryMappedModel(fd.release(), data, size)};
-}
-
-} // namespace tflread
diff --git a/compiler/tfldump/src/OpPrinter.cpp b/compiler/tfldump/src/OpPrinter.cpp
index 47edcb086..2e8e7134f 100644
--- a/compiler/tfldump/src/OpPrinter.cpp
+++ b/compiler/tfldump/src/OpPrinter.cpp
@@ -736,6 +736,7 @@ OpPrinterRegistry::OpPrinterRegistry()
   // There is no Option for CEIL
   _op_map[tflite::BuiltinOperator_CONCATENATION] = make_unique<ConcatenationPrinter>();
   _op_map[tflite::BuiltinOperator_CONV_2D] = make_unique<Conv2DPrinter>();
+  // There is no Option for DENSIFY
   _op_map[tflite::BuiltinOperator_DEPTH_TO_SPACE] = make_unique<DepthToSpacePrinter>();
   _op_map[tflite::BuiltinOperator_DEPTHWISE_CONV_2D] = make_unique<DepthwiseConv2DPrinter>();
   // There is no Option for DEQUANTIZE
diff --git a/compiler/tflite2circle-conversion-test/CMakeLists.txt b/compiler/tflite2circle-conversion-test/CMakeLists.txt
index 83fe23a8f..2e67d48bd 100644
--- a/compiler/tflite2circle-conversion-test/CMakeLists.txt
+++ b/compiler/tflite2circle-conversion-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
 nnas_include(TargetRequire)
 
 unset(REQUIRED_TARGETS)
diff --git a/compiler/tflite2circle/driver/Driver.cpp b/compiler/tflite2circle/driver/Driver.cpp
index fb8c211b6..6afe1b0f2 100644
--- a/compiler/tflite2circle/driver/Driver.cpp
+++ b/compiler/tflite2circle/driver/Driver.cpp
@@ -36,24 +36,11 @@ int entry(int argc, char **argv)
 {
   arser::Arser arser{"tflite2circle is a Tensorflow lite to circle model converter"};
 
-  arser.add_argument("--version")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("Show version information and exit")
-    .exit_with(print_version);
-
-  arser.add_argument("-V", "--verbose")
-    .nargs(0)
-    .required(false)
-    .default_value(false)
-    .help("output additional information to stdout or stderr");
-
-  arser.add_argument("tflite")
-    .nargs(1)
-    .type(arser::DataType::STR)
-    .help("Source tflite file path to convert");
-  arser.add_argument("circle").nargs(1).type(arser::DataType::STR).help("Target circle file path");
+  arser::Helper::add_version(arser, print_version);
+  arser::Helper::add_verbose(arser);
+
+  arser.add_argument("tflite").help("Source tflite file path to convert");
+  arser.add_argument("circle").help("Target circle file path");
 
   try
   {
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions.h
index 88a4f71df..8149197f6 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions.h
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions.h
@@ -31,8 +31,10 @@
 #include "BuildBuiltinOptions/ConcatenationOptions.h"
 #include "BuildBuiltinOptions/Conv2DOptions.h"
 #include "BuildBuiltinOptions/CosOptions.h"
+#include "BuildBuiltinOptions/DensifyOptions.h"
 #include "BuildBuiltinOptions/DepthToSpaceOptions.h"
 #include "BuildBuiltinOptions/DepthwiseConv2DOptions.h"
+#include "BuildBuiltinOptions/DequantizeOptions.h"
 #include "BuildBuiltinOptions/DivOptions.h"
 #include "BuildBuiltinOptions/EqualOptions.h"
 #include "BuildBuiltinOptions/ExpandDimsOptions.h"
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.cpp
new file mode 100644
index 000000000..4e5863576
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DensifyOptions.h"
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::DensifyOptions>
+build_circle_DensifyOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *)
+{
+  circle::DensifyOptionsBuilder builtin_options_builder{fb};
+  return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.h
new file mode 100644
index 000000000..b6126c4e2
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_DENSIFY_OPTIONS_H__
+#define __BBO_DENSIFY_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::DensifyOptions>
+build_circle_DensifyOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_DENSIFY_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.cpp
new file mode 100644
index 000000000..eeacece6a
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DequantizeOptions.h"
+#include "DataLookup.h"
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::DequantizeOptions>
+build_circle_DequantizeOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+  circle::DequantizeOptionsBuilder builtin_options_builder{fb};
+  return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.h
new file mode 100644
index 000000000..1cb9f9c1a
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_DEQUANTIZE_OPTIONS_H__
+#define __BBO_DEQUANTIZE_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::DequantizeOptions>
+build_circle_DequantizeOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_DEQUANTIZE_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp
index d2d2888f2..db88d3e82 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp
@@ -25,8 +25,6 @@ namespace tflite2circle
 flatbuffers::Offset<circle::MaximumMinimumOptions>
 build_circle_MaximumMinimumOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
 {
-  auto tflite_builtin_options = op->builtin_options_as_MaximumMinimumOptions();
-  assert(tflite_builtin_options);
   circle::MaximumMinimumOptionsBuilder builtin_options_builder{fb};
   return builtin_options_builder.Finish();
 }
diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp
index d483b288f..ac017b8f1 100644
--- a/compiler/tflite2circle/src/CircleModel.cpp
+++ b/compiler/tflite2circle/src/CircleModel.cpp
@@ -344,8 +344,13 @@ template <> void Offset<OperatorCodeLink>::build(const TFLFlatBufVec *tflite_fla
     circle::OperatorCodeBuilder operator_code_builder{*_fb};
     auto de_code = it->deprecated_builtin_code();
     auto bt_code = it->builtin_code();
-    operator_code_builder.add_deprecated_builtin_code(get_circle_builtin_code(de_code));
-    operator_code_builder.add_builtin_code(get_circle_builtin_code(bt_code));
+    auto cir_de_code = get_circle_builtin_code(de_code);
+    auto cir_bt_code = get_circle_builtin_code(bt_code);
+    // correct bt_code where bt_code == 0 for old tflite format
+    if (cir_bt_code == 0)
+      cir_bt_code = static_cast<circle::BuiltinOperator>(cir_de_code);
+    operator_code_builder.add_deprecated_builtin_code(cir_de_code);
+    operator_code_builder.add_builtin_code(cir_bt_code);
     operator_code_builder.add_custom_code(custom_code);
     operator_code_builder.add_version(it->version());
     auto code = operator_code_builder.Finish();
diff --git a/compiler/tflite2circle/src/TFLBuiltinOptions.lst b/compiler/tflite2circle/src/TFLBuiltinOptions.lst
index d55ba464a..9cbf8032a 100644
--- a/compiler/tflite2circle/src/TFLBuiltinOptions.lst
+++ b/compiler/tflite2circle/src/TFLBuiltinOptions.lst
@@ -42,7 +42,7 @@ TFL_BUILTIN_OPTIONS(TopKV2Options)
 TFL_BUILTIN_OPTIONS(SplitOptions)
 TFL_BUILTIN_OPTIONS(LogSoftmaxOptions)
 TFL_BUILTIN_OPTIONS(CastOptions)
-//TFL_BUILTIN_OPTIONS(DequantizeOptions)
+TFL_BUILTIN_OPTIONS(DequantizeOptions)
 TFL_BUILTIN_OPTIONS(MaximumMinimumOptions)
 TFL_BUILTIN_OPTIONS(ArgMaxOptions)
 TFL_BUILTIN_OPTIONS(LessOptions)
@@ -106,3 +106,4 @@ TFL_BUILTIN_OPTIONS(RankOptions)
 TFL_BUILTIN_OPTIONS(ScatterNdOptions)
 TFL_BUILTIN_OPTIONS(SegmentSumOptions)
 TFL_BUILTIN_OPTIONS(BatchMatMulOptions)
+TFL_BUILTIN_OPTIONS(DensifyOptions)
diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt
index 3841a1b78..93c33cdbd 100644
--- a/compiler/vconone/CMakeLists.txt
+++ b/compiler/vconone/CMakeLists.txt
@@ -1,5 +1,5 @@
 if (NOT VCONONE_VERSION)
-  set(VCONONE_VERSION 0x0000000000140001)
+  set(VCONONE_VERSION 0x0000000000150001)
   # NOTE order is [build patch minor major]
   # if VCONONE_VERSION is set with -D option, it will be cached
   # you may have to remove cache file if you remove -D option
diff --git a/compiler/vconone/src/version.cpp b/compiler/vconone/src/version.cpp
index d94a7ada6..cebf7d998 100644
--- a/compiler/vconone/src/version.cpp
+++ b/compiler/vconone/src/version.cpp
@@ -54,7 +54,7 @@ std::string get_string(void)
 std::string get_copyright(void)
 {
   std::string str;
-  str = "Copyright (c) 2020-2021 Samsung Electronics Co., Ltd. All Rights Reserved\r\n";
+  str = "Copyright (c) 2020-2022 Samsung Electronics Co., Ltd. All Rights Reserved\r\n";
   str += "Licensed under the Apache License, Version 2.0\r\n";
   str += "https://github.com/Samsung/ONE";
   return str;
diff --git a/compute/ARMComputeEx/CMakeLists.txt b/compute/ARMComputeEx/CMakeLists.txt
index 58f558db2..c8d12c249 100644
--- a/compute/ARMComputeEx/CMakeLists.txt
+++ b/compute/ARMComputeEx/CMakeLists.txt
@@ -14,7 +14,7 @@ file(GLOB_RECURSE ACL_EX_SRCS "${ACL_EX_BASE}/*.cpp")
 # generate embeded cl_kernel
 execute_process (
     WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
-    COMMAND bash -c "python resolve_includes.py"
+    COMMAND bash -c "python3 resolve_includes.py"
 )
 
 add_library(arm_compute_ex SHARED ${ACL_EX_SRCS})
diff --git a/compute/cker/CMakeLists.txt b/compute/cker/CMakeLists.txt
index 09f67259c..9b3cd4f36 100644
--- a/compute/cker/CMakeLists.txt
+++ b/compute/cker/CMakeLists.txt
@@ -17,3 +17,20 @@ target_include_directories(nnfw_lib_cker INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/i
 # Workaround to avoid warning
 # TODO Resolve warning
 target_compile_options(nnfw_lib_cker INTERFACE -Wno-attributes)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+set(TEST_CKER test_cker)
+
+file(GLOB_RECURSE TESTS "src/*.test.cc")
+
+add_executable(${TEST_CKER} ${TESTS})
+
+target_link_libraries(${TEST_CKER} nnfw_lib_cker)
+target_link_libraries(${TEST_CKER} nnfw_coverage)
+target_link_libraries(${TEST_CKER} gtest gtest_main ${LIB_PTHREAD})
+
+add_test(${TEST_CKER} ${TEST_CKER})
+install(TARGETS ${TEST_CKER} DESTINATION unittest_standalone)
diff --git a/compute/cker/include/cker/CpuBackendThreadpool.h b/compute/cker/include/cker/CpuBackendThreadpool.h
index cc6a9dbfc..8ec6140bd 100644
--- a/compute/cker/include/cker/CpuBackendThreadpool.h
+++ b/compute/cker/include/cker/CpuBackendThreadpool.h
@@ -21,6 +21,8 @@
 #include <ruy/context.h>     // from @ruy
 #include <ruy/thread_pool.h> // from @ruy
 
+#include <stdexcept>
+
 namespace nnfw
 {
 namespace cker
@@ -33,7 +35,12 @@ using Task = ruy::Task;
 template <typename TaskType>
 void Execute(int tasks_count, TaskType *tasks, ruy::Context *ruy_context)
 {
+  assert(ruy_context != nullptr);
   assert(tasks_count <= ruy_context->max_num_threads());
+  if (ruy_context == nullptr)
+  {
+    throw std::runtime_error("CpuBackendThreadpool.h: ruy::Context is null");
+  }
   ruy_context->mutable_thread_pool()->Execute(tasks_count, tasks);
 }
 
diff --git a/compute/cker/include/cker/NeonTensorUtils.h b/compute/cker/include/cker/NeonTensorUtils.h
index 8bf0bee03..45ad969c3 100644
--- a/compute/cker/include/cker/NeonTensorUtils.h
+++ b/compute/cker/include/cker/NeonTensorUtils.h
@@ -632,7 +632,7 @@ inline void NeonCpuBackendGemm(const int8_t *input, const int32_t *bias,
   ruy_support::MakeRuyMatrix(rhs_params, input, &ruy_rhs, true);
   ruy_support::MakeRuyMatrix(dst_params, scratch, &ruy_dst);
 
-  ruy::BasicSpec<int32_t, int32_t> ruy_mul_params;
+  ruy::MulParams<int32_t, int32_t> ruy_mul_params;
   ruy_support::MakeRuyMulParams(gemm_params, &ruy_mul_params);
 
   ruy::Mul(ruy_lhs, ruy_rhs, ruy_mul_params, ruy_context, &ruy_dst);
diff --git a/compute/cker/include/cker/operation/Conv.h b/compute/cker/include/cker/operation/Conv.h
index 16c937a27..7cd54dcd5 100644
--- a/compute/cker/include/cker/operation/Conv.h
+++ b/compute/cker/include/cker/operation/Conv.h
@@ -57,9 +57,9 @@ class Conv
 public:
   Conv() : _modified_filter_data(), _im2col_shape(4), _need_im2col(false), _prepared(false) {}
 
-  void prepare(const Shape &filter_shape, const float *filter_data, PaddingType padding_type,
-               bool &is_replaced_weights, uint32_t dilationWidthFactor,
-               uint32_t dilationHeightFactor)
+  void prepareF32(const Shape &filter_shape, const float *filter_data, PaddingType padding_type,
+                  bool &is_replaced_weights, uint32_t dilationWidthFactor,
+                  uint32_t dilationHeightFactor)
   {
     if (!_prepared)
     {
@@ -71,9 +71,9 @@ public:
     }
   }
 
-  void prepareQuant(const Shape &input_shape, const Shape &kernel_shape, const Shape &output_shape,
-                    uint32_t stride_width, uint32_t stride_height, uint32_t dilation_width_factor,
-                    uint32_t dilation_height_factor)
+  void prepareQ8uPerTensor(const Shape &input_shape, const Shape &kernel_shape,
+                           const Shape &output_shape, uint32_t stride_width, uint32_t stride_height,
+                           uint32_t dilation_width_factor, uint32_t dilation_height_factor)
   {
     if (!_prepared)
     {
@@ -138,13 +138,25 @@ public:
     }
   }
 
+  void operator()(const ConvParams &params, const Shape &input_shape, const uint8_t *input_data,
+                  const Shape &filter_shape, const uint8_t *filter_data,
+                  const int32_t *filter_zero_point, const Shape &bias_shape,
+                  const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
+  {
+    reference::Conv<uint8_t, true>(params, _per_channel_output_multiplier.data(),
+                                   _per_channel_output_shift.data(), input_shape, input_data,
+                                   filter_shape, filter_data, filter_zero_point, bias_shape,
+                                   bias_data, output_shape, output_data);
+  }
+
   void operator()(const ConvParams &params, const Shape &input_shape, const int8_t *input_data,
                   const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape,
                   const int32_t *bias_data, const Shape &output_shape, int8_t *output_data)
   {
-    reference::Conv(params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
-                    input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data,
-                    output_shape, output_data);
+    reference::Conv<int8_t, false>(params, _per_channel_output_multiplier.data(),
+                                   _per_channel_output_shift.data(), input_shape, input_data,
+                                   filter_shape, filter_data, nullptr /* filter_zero_point */,
+                                   bias_shape, bias_data, output_shape, output_data);
   }
   std::vector<int32_t> &per_channel_output_multiplier() { return _per_channel_output_multiplier; }
   std::vector<int> &per_channel_output_shift() { return _per_channel_output_shift; }
diff --git a/compute/cker/include/cker/operation/DepthwiseConv.h b/compute/cker/include/cker/operation/DepthwiseConv.h
index 06ee780bb..ed1f93d44 100644
--- a/compute/cker/include/cker/operation/DepthwiseConv.h
+++ b/compute/cker/include/cker/operation/DepthwiseConv.h
@@ -25,6 +25,7 @@
 #include "cker/operation/optimized/DepthwiseConvFloat.h"
 #include "cker/operation/optimized/DepthwiseConvUint8.h"
 #include "cker/operation/optimized/integer_ops/DepthwiseConvInt8.h"
+#include "cker/operation/reference/integer_ops/DepthwiseConvUInt8.h"
 #include "cker/CpuBackendThreadpool.h"
 
 namespace nnfw
diff --git a/compute/cker/include/cker/operation/reference/Conv.h b/compute/cker/include/cker/operation/reference/Conv.h
index 4474754af..8bfd4694e 100644
--- a/compute/cker/include/cker/operation/reference/Conv.h
+++ b/compute/cker/include/cker/operation/reference/Conv.h
@@ -190,10 +190,13 @@ inline void Conv(const ConvParams &params, const Shape &input_shape, const uint8
   }
 }
 
+template <typename T, bool is_asymmetric>
 inline void Conv(const ConvParams &params, const int32_t *output_multiplier,
-                 const int32_t *output_shift, const Shape &input_shape, const int8_t *input_data,
-                 const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape,
-                 const int32_t *bias_data, const Shape &output_shape, int8_t *output_data)
+                 const int32_t *output_shift, const Shape &input_shape, const T *input_data,
+                 const Shape &filter_shape, const T *filter_data, const int32_t *filter_zeropoint,
+                 const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape,
+                 T *output_data)
+
 {
   UNUSED_RELEASE(bias_shape);
   // Get parameters.
@@ -259,26 +262,35 @@ inline void Conv(const ConvParams &params, const int32_t *output_multiplier,
 
               for (int in_channel = 0; in_channel < input_depth; ++in_channel)
               {
-                int32_t input_val = input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
-                int32_t filter_val =
+                const T input_val = input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
+                const T filter_val =
                   filter_data[Offset(filter_shape, out_channel, filter_y, filter_x, in_channel)];
-                // Accumulate with 32 bits accumulator.
-                // In the nudging process during model quantization, we force
-                // real value of 0.0 be represented by a quantized value. This
-                // guarantees that the input_offset is a int8_t, even though
-                // it is represented using int32_t. int32_t += int8_t *
-                // (int8_t - int8_t) so the highest value we can get from each
-                // accumulation is [-127, 127] * ([-128, 127] -
-                // [-128, 127]), which is [-32512, 32512]. log2(32512)
-                // = 14.98, which means we can accumulate at least 2^16
-                // multiplications without overflow. The accumulator is
-                // applied to a filter so the accumulation logic will hold as
-                // long as the filter size (filter_y * filter_x * in_channel)
-                // does not exceed 2^16, which is the case in all the models
-                // we have seen so far.
-                // TODO(jianlijianli): Add a check to make sure the
-                // accumulator depth is smaller than 2^16.
-                acc += filter_val * (input_val + input_offset);
+                if (is_asymmetric)
+                {
+                  const int32_t filter_offset = -filter_zeropoint[out_channel];
+                  acc += (filter_val + filter_offset) * (input_val + input_offset);
+                }
+                else
+                {
+                  // Accumulate with 32 bits accumulator.
+                  // In the nudging process during model quantization, we force
+                  // real value of 0.0 be represented by a quantized value. This
+                  // guarantees that the input_offset is a int8_t, even though
+                  // it is represented using int32_t. int32_t += int8_t *
+                  // (int8_t - int8_t) so the highest value we can get from each
+                  // accumulation is [-127, 127] * ([-128, 127] -
+                  // [-128, 127]), which is [-32512, 32512]. log2(32512)
+                  // = 14.98, which means we can accumulate at least 2^16
+                  // multiplications without overflow. The accumulator is
+                  // applied to a filter so the accumulation logic will hold as
+                  // long as the filter size (filter_y * filter_x * in_channel)
+                  // does not exceed 2^16, which is the case in all the models
+                  // we have seen so far.
+                  // TODO(jianlijianli): Add a check to make sure the
+                  // accumulator depth is smaller than 2^16.
+                  acc += filter_val * (input_val + input_offset);
+                  UNUSED_RELEASE(filter_zeropoint);
+                }
               }
             }
           }
@@ -292,8 +304,7 @@ inline void Conv(const ConvParams &params, const int32_t *output_multiplier,
           acc += output_offset;
           acc = std::max(acc, output_activation_min);
           acc = std::min(acc, output_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
-            static_cast<int8_t>(acc);
+          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = static_cast<T>(acc);
         }
       }
     }
diff --git a/compute/cker/include/cker/operation/reference/integer_ops/DepthwiseConvUInt8.h b/compute/cker/include/cker/operation/reference/integer_ops/DepthwiseConvUInt8.h
new file mode 100644
index 000000000..025e40705
--- /dev/null
+++ b/compute/cker/include/cker/operation/reference/integer_ops/DepthwiseConvUInt8.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_REFERENCE_DEPTHWISE_CONV_UINT8_H__
+#define __NNFW_CKER_REFERENCE_DEPTHWISE_CONV_UINT8_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+namespace reference_integer_ops
+{
+inline void DepthwiseConvPerChannel(const DepthwiseConvParams &params,
+                                    const int32_t *output_multiplier, const int32_t *output_shift,
+                                    const Shape &input_shape, const uint8_t *input_data,
+                                    const Shape &filter_shape, const uint8_t *filter_data,
+                                    const int32_t *filter_zeropoint, const Shape &bias_shape,
+                                    const int32_t *bias_data, const Shape &output_shape,
+                                    uint8_t *output_data)
+{
+  // Get parameters.
+  // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int depth_multiplier = params.depth_multiplier;
+  const int32_t input_offset = params.input_offset;
+  const int32_t output_offset = params.output_offset;
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
+
+  // Check dimensions of the tensors.
+  assert(input_shape.DimensionsCount() == 4);
+  assert(filter_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+
+  assert(output_activation_min <= output_activation_max);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int input_depth = input_shape.Dims(3);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  UNUSED_RELEASE(output_depth);
+  UNUSED_RELEASE(bias_shape);
+  assert(output_depth == input_depth * depth_multiplier);
+  assert(bias_shape.FlatSize() == output_depth);
+
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+        {
+          for (int m = 0; m < depth_multiplier; ++m)
+          {
+            const int output_channel = m + in_channel * depth_multiplier;
+            const int in_x_origin = (out_x * stride_width) - pad_width;
+            const int in_y_origin = (out_y * stride_height) - pad_height;
+            int32_t acc = 0;
+            for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+            {
+              for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+              {
+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
+                const int in_y = in_y_origin + dilation_height_factor * filter_y;
+                // Zero padding by omitting the areas outside the image.
+                const bool is_point_inside_image =
+                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+                if (is_point_inside_image)
+                {
+                  uint8_t input_val =
+                    input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
+                  uint8_t filter_val =
+                    filter_data[Offset(filter_shape, 0, filter_y, filter_x, output_channel)];
+
+                  // { for per-channel
+                  // NOTE: The following comment is copied from tflite int8 implementation
+                  //       It may not be 100% true for uint8 per-channel.
+                  //
+                  // Accumulate with 32 bits accumulator.
+                  // In the nudging process during model quantization, we force
+                  // real value of 0.0 be represented by a quantized value. This
+                  // guarantees that the input_offset is a int8, even though it
+                  // is represented using int32_t.
+                  // int32 += int8 * (int8 - int8) so the highest value we can
+                  // get from each accumulation is [-127, 127] * ([-128, 127] -
+                  // [-128, 127]), which is [-32512, 32512]. log2(32512)
+                  // = 14.98, which means we can accumulate at least 2^16
+                  // multiplications without overflow. The accumulator is
+                  // applied to a filter so the accumulation logic will hold as
+                  // long as the filter size (filter_y * filter_x * in_channel)
+                  // does not exceed 2^16, which is the case in all the models
+                  // we have seen so far.
+                  // TODO(jianlijianli): Add a check to make sure the
+                  // accumulator depth is smaller than 2^16.
+                  const int32_t filter_offset = -filter_zeropoint[output_channel];
+                  acc += (filter_val + filter_offset) * (input_val + input_offset);
+                  // } for per-channel
+                }
+              }
+            }
+            if (bias_data)
+            {
+              acc += bias_data[output_channel];
+            }
+            acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[output_channel],
+                                                output_shift[output_channel]);
+            acc += output_offset;
+            acc = std::max(acc, output_activation_min);
+            acc = std::min(acc, output_activation_max);
+            // For q8u per-channel, int8_t -> uint8_t
+            output_data[Offset(output_shape, batch, out_y, out_x, output_channel)] =
+              static_cast<uint8_t>(acc);
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace reference_integer_ops
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_REFERENCE_DEPTHWISE_CONV_UINT8_H__
diff --git a/compute/cker/include/cker/ruy/RuySupport.h b/compute/cker/include/cker/ruy/RuySupport.h
index 62eeaf6bd..14489a804 100644
--- a/compute/cker/include/cker/ruy/RuySupport.h
+++ b/compute/cker/include/cker/ruy/RuySupport.h
@@ -64,23 +64,35 @@ void MakeRuyMatrix(const MatrixParams<Scalar> &params, DataPointer data_ptr,
   }
 }
 
-template <typename GemmParamsType, typename RuySpecType>
-void MakeRuyMulParams(const GemmParamsType &params, RuySpecType *ruy_mul_params)
+// Integer-quantized case with destination type narrower than int32
+template <typename DstScalar, QuantizationFlavor quantization_flavor>
+void MakeRuyMulParams(const GemmParams<std::int32_t, DstScalar, quantization_flavor> &params,
+                      ruy::MulParams<std::int32_t, DstScalar> *ruy_mul_params)
 {
-  // This validation has already been performed by the Gemm API entry point,
-  // but it doesn't hurt to test specifically this again here, where it's
-  // being used.
-  ValidateGemmParams(params);
-
-  ruy_mul_params->set_multiplier_fixedpoint(params.multiplier_fixedpoint);
-  ruy_mul_params->set_multiplier_exponent(params.multiplier_exponent);
-  ruy_mul_params->set_multiplier_fixedpoint_perchannel(params.multiplier_fixedpoint_perchannel);
-  ruy_mul_params->set_multiplier_exponent_perchannel(params.multiplier_exponent_perchannel);
+  static_assert(sizeof(DstScalar) < sizeof(std::int32_t), "");
+  if (quantization_flavor == QuantizationFlavor::kIntegerWithUniformMultiplier)
+  {
+    ruy_mul_params->set_multiplier_fixedpoint(params.multiplier_fixedpoint);
+    ruy_mul_params->set_multiplier_exponent(params.multiplier_exponent);
+  }
+  if (quantization_flavor == QuantizationFlavor::kIntegerWithPerRowMultiplier)
+  {
+    ruy_mul_params->set_multiplier_fixedpoint_perchannel(params.multiplier_fixedpoint_perchannel);
+    ruy_mul_params->set_multiplier_exponent_perchannel(params.multiplier_exponent_perchannel);
+  }
   ruy_mul_params->set_bias(params.bias);
   ruy_mul_params->set_clamp_min(params.clamp_min);
   ruy_mul_params->set_clamp_max(params.clamp_max);
 }
 
+// Raw-integer case with destination type int32.
+template <QuantizationFlavor quantization_flavor>
+void MakeRuyMulParams(const GemmParams<std::int32_t, std::int32_t, quantization_flavor> &params,
+                      ruy::MulParams<std::int32_t, std::int32_t> *ruy_mul_params)
+{
+  ruy_mul_params->set_bias(params.bias);
+}
+
 } // namespace ruy_support
 } // namespace cker
 } // namespace nnfw
diff --git a/compute/test/cker/Range.cc b/compute/cker/src/Range.test.cc
index e5fe4801f..e5fe4801f 100644
--- a/compute/test/cker/Range.cc
+++ b/compute/cker/src/Range.test.cc
diff --git a/compute/ruy/include/ruy/RuySupport.h b/compute/ruy/include/ruy/RuySupport.h
index 7086a96c4..2f9ed7457 100644
--- a/compute/ruy/include/ruy/RuySupport.h
+++ b/compute/ruy/include/ruy/RuySupport.h
@@ -64,23 +64,46 @@ void MakeRuyMatrix(const MatrixParams<Scalar> &params, DataPointer data_ptr,
   }
 }
 
-template <typename GemmParamsType, typename RuySpecType>
-void MakeRuyMulParams(const GemmParamsType &params, RuySpecType *ruy_mul_params)
+// Floating-point case.
+template <typename AccumScalar, typename DstScalar, QuantizationFlavor quantization_flavor>
+void MakeRuyMulParams(const GemmParams<AccumScalar, DstScalar, quantization_flavor> &params,
+                      ::ruy::MulParams<AccumScalar, DstScalar> *ruy_mul_params)
 {
-  // This validation has already been performed by the Gemm API entry point,
-  // but it doesn't hurt to test specifically this again here, where it's
-  // being used.
-  ValidateGemmParams(params);
+  static_assert(quantization_flavor == QuantizationFlavor::kFloatingPoint, "");
+  ruy_mul_params->set_bias(params.bias);
+  ruy_mul_params->set_clamp_min(params.clamp_min);
+  ruy_mul_params->set_clamp_max(params.clamp_max);
+}
 
-  ruy_mul_params->set_multiplier_fixedpoint(params.multiplier_fixedpoint);
-  ruy_mul_params->set_multiplier_exponent(params.multiplier_exponent);
-  ruy_mul_params->set_multiplier_fixedpoint_perchannel(params.multiplier_fixedpoint_perchannel);
-  ruy_mul_params->set_multiplier_exponent_perchannel(params.multiplier_exponent_perchannel);
+// Integer-quantized case with destination type narrower than int32
+template <typename DstScalar, QuantizationFlavor quantization_flavor>
+void MakeRuyMulParams(const GemmParams<std::int32_t, DstScalar, quantization_flavor> &params,
+                      ::ruy::MulParams<std::int32_t, DstScalar> *ruy_mul_params)
+{
+  static_assert(sizeof(DstScalar) < sizeof(std::int32_t), "");
+  if (quantization_flavor == QuantizationFlavor::kIntegerWithUniformMultiplier)
+  {
+    ruy_mul_params->set_multiplier_fixedpoint(params.multiplier_fixedpoint);
+    ruy_mul_params->set_multiplier_exponent(params.multiplier_exponent);
+  }
+  if (quantization_flavor == QuantizationFlavor::kIntegerWithPerRowMultiplier)
+  {
+    ruy_mul_params->set_multiplier_fixedpoint_perchannel(params.multiplier_fixedpoint_perchannel);
+    ruy_mul_params->set_multiplier_exponent_perchannel(params.multiplier_exponent_perchannel);
+  }
   ruy_mul_params->set_bias(params.bias);
   ruy_mul_params->set_clamp_min(params.clamp_min);
   ruy_mul_params->set_clamp_max(params.clamp_max);
 }
 
+// Raw-integer case with destination type int32.
+template <QuantizationFlavor quantization_flavor>
+void MakeRuyMulParams(const GemmParams<std::int32_t, std::int32_t, quantization_flavor> &params,
+                      ::ruy::MulParams<std::int32_t, std::int32_t> *ruy_mul_params)
+{
+  ruy_mul_params->set_bias(params.bias);
+}
+
 } // namespace ruy_support
 } // namespace ruy
 } // namespace nnfw
diff --git a/compute/ruy/include/ruy/operation/Conv.h b/compute/ruy/include/ruy/operation/Conv.h
index 2b9c8c390..3f03694bd 100644
--- a/compute/ruy/include/ruy/operation/Conv.h
+++ b/compute/ruy/include/ruy/operation/Conv.h
@@ -169,7 +169,7 @@ private:
     ruy_support::MakeRuyMatrix(rhs_params, gemm_input_data, &ruy_rhs, true);
     ruy_support::MakeRuyMatrix(dst_params, output_data, &ruy_dst);
 
-    ::ruy::BasicSpec<float, float> ruy_mul_params;
+    ::ruy::MulParams<float, float> ruy_mul_params;
     ruy_support::MakeRuyMulParams(gemm_params, &ruy_mul_params);
 
     ::ruy::Mul(ruy_lhs, ruy_rhs, ruy_mul_params, ruy_context, &ruy_dst);
diff --git a/compute/ruy/include/ruy/operation/FullyConnected.h b/compute/ruy/include/ruy/operation/FullyConnected.h
index 59facdb22..1d686b64b 100644
--- a/compute/ruy/include/ruy/operation/FullyConnected.h
+++ b/compute/ruy/include/ruy/operation/FullyConnected.h
@@ -68,7 +68,7 @@ inline void FullyConnected(const FullyConnectedParams &params, const Shape &inpu
   ruy_support::MakeRuyMatrix(rhs_params, input_data, &ruy_rhs, true);
   ruy_support::MakeRuyMatrix(dst_params, output_data, &ruy_dst);
 
-  ::ruy::BasicSpec<float, float> ruy_mul_params;
+  ::ruy::MulParams<float, float> ruy_mul_params;
   ruy_support::MakeRuyMulParams(gemm_params, &ruy_mul_params);
 
   ::ruy::Mul(ruy_lhs, ruy_rhs, ruy_mul_params, ruy_context, &ruy_dst);
diff --git a/compute/test/CMakeLists.txt b/compute/test/CMakeLists.txt
deleted file mode 100644
index 92aac3e72..000000000
--- a/compute/test/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-if(NOT ENABLE_TEST)
-  return()
-endif(NOT ENABLE_TEST)
-
-set(TEST_COMPUTE test_compute)
-
-file(GLOB_RECURSE TESTS "*.cc")
-
-add_executable(${TEST_COMPUTE} ${TESTS})
-
-target_link_libraries(${TEST_COMPUTE} nnfw_lib_cker)
-target_link_libraries(${TEST_COMPUTE} gtest)
-target_link_libraries(${TEST_COMPUTE} gtest_main)
-target_link_libraries(${TEST_COMPUTE} ${LIB_PTHREAD} dl)
-add_test(${TEST_COMPUTE} ${TEST_COMPUTE})
-
-install(TARGETS ${TEST_COMPUTE} DESTINATION unittest_standalone)
diff --git a/docs/conf.py b/docs/conf.py
index 84197e6d6..409e5f79b 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -21,7 +21,7 @@ copyright = '2020, Samsung Research & contributors'
 author = 'Samsung Research & contributors'
 
 # The full version, including alpha/beta/rc tags
-release = '1.20.0'
+release = '1.21.0'
 
 # -- General configuration ---------------------------------------------------
 
diff --git a/docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md b/docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md
index 1f8c0c289..57b2b787c 100644
--- a/docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md
+++ b/docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md
@@ -174,34 +174,26 @@ $ vi j2/etc/systemd/system/ip.service
 and set as like:
 ```
 [Service]
-Type=simple
 Restart=always
 RestartSec=1
 User=root
-ExecStart=/bin/sh /bin/ip.sh
+ExecStart=/bin/sh -c "ifconfig eth0 192.168.x.y netmask 255.255.255.0 up"
 
 [Install]
 WantedBy=multi-user.target
 ```
+Replace 192.168.x.y to your actual ip address.
 
-(5-3) Add a new file
-```
-$ vi j2/bin/ip.sh
-```
-and set with IP address for your RPi4:
-```
-ifconfig eth0 192.168.x.y netmask 255.255.255.0 up
-```
-where you should update `192.168.x.y` part to your actual IP address.
 
-(5-4) Add a symbolic link
+(5-3) Add a symbolic link
 ```
+$ sudo mkdir -p j2/etc/systemd/system/multi-user.target.wants/
 $ pushd j2/etc/systemd/system/multi-user.target.wants/
 $ sudo ln -s ../../system/ip.service .
 $ popd
 ```
 
-(5-5) Now that every thing is ready, unmount and unplug your memory card and plug into
+(5-4) Now that every thing is ready, unmount and unplug your memory card and plug into
 RPi4, turn on the power.
 ```
 $ sync
diff --git a/docs/release/1.20/index.rst b/docs/release/1.20/index.rst
new file mode 100644
index 000000000..082d867f3
--- /dev/null
+++ b/docs/release/1.20/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Tue Apr 26 10:18:12 2022.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.20
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.20.0.md
diff --git a/docs/release/1.20/release-note-1.20.0.md b/docs/release/1.20/release-note-1.20.0.md
new file mode 100644
index 000000000..2c75e06af
--- /dev/null
+++ b/docs/release/1.20/release-note-1.20.0.md
@@ -0,0 +1,34 @@
+# Release Note 1.20.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- luci-interpreter supports multiple kernels with PAL layer including Cortext-M
+- luci-interpreter supports integer tensor for partly kernels
+- luci import support constant without coping to reduce memory for luci-interpreter
+- Reduce duplicate codes to package released modules
+- Limited support for ONNX LSTM/RNN unrolling while importing
+- Limited support for ARM32 cross build
+- Support new operator: SVDF
+- New virtual CircleVariable to support tensor with variable
+- Support quantization of BatchMatMul Op
+- Support mixed(UINT8 + INT16) quantization
+- Support backward propagation of quantization parameters
+- Upgrade default python to version 3.8
+- Support TensorFlow 2.8.0, ONNX-TF 1.10.0, ONNX 1.11.0
+- Upgrade circle schema to follow tflite schema v3b
+- Refactor to mio-tflite280, mio-circle04 with version and helpers methods
+- Use one flatbuffers 2.0 version
+- Drop support for TensorFlow 1.x
+- Fix for several bugs, performance enhancements, and typos
+
+## ONE Runtime
+
+### Introduce TRIX backend
+- TRIX backend supports trix binary with NHWC layout
+- TRIX backend supports trix binary with input/output of Q8 and Q16 type
+
+### API supports new data type
+- Symmetric Quantized int16 type named "NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED"
+
diff --git a/docs/release/1.21/index.rst b/docs/release/1.21/index.rst
new file mode 100644
index 000000000..587065f56
--- /dev/null
+++ b/docs/release/1.21/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Wed Sep 06 12:18:12 2022.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.21
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.21.0.md
diff --git a/docs/release/1.21/release-note_1.21.0.md b/docs/release/1.21/release-note_1.21.0.md
new file mode 100644
index 000000000..49bf074b6
--- /dev/null
+++ b/docs/release/1.21/release-note_1.21.0.md
@@ -0,0 +1,35 @@
+# Release Note 1.21.0
+
+## ONE Compiler
+
+- Support unrolling of LSTM and RNN Ops in `one-import-onnx` tool
+- Introduced new tools `one-infer`, `circle-operator`, `circle-interpreter`
+- Introduced `Workflow`(WIP) in `one-cmds`
+- New option `quant_config` in `one-quantize`
+- New option `fake_quantize` in `one-quantize`
+- More Ops supported: Densify
+- More Ops for quantization: ReduceMax
+- More Ops for mixed-precision quantization (MPQ): LeakyRelu, Neg, Relu6, Squeeze
+- More Ops for `convert_nchw_to_nhwc` option: LogSoftmax, ReduceMax, SplitV, Softmax
+- New optimization options in `one-optimize`: `replace_non_const_fc_with_bmm`, `resolve_customop_splitv`, `fold_densify`
+- Improved reshape elimination in `convert_nchw_to_nhwc` option.
+- Support fusion of Channel-wise Add + Relu with TConv
+- Support negative axis in ArgMin/Max
+- Show errors for unrecognized options in `one-optimize`
+- Fix shape inference for `StridedSlice`
+- Fix FuseBatchNormWithTConvPass to support TConv with bias
+- Deprecate `--O1` option in `circle2circle`
+- Support gcc-11
+- Support limited Float16 for kernels constants with dequantization to Float32
+
+## ONE Runtime
+
+### Basic Multimodel nnpackage
+- Runtime supports to run nnpackage with two models
+
+### Channel Wise Quantization on Conv2D and Depthwise Conv2D
+- Conv2D and Depthwise Conv2D supports per-channel quantization of uint8 type. 
+
+### Batch Execution with TRIX backend
+- TRIX backend supports batch execution which run in parallel with multicore
+
diff --git a/infra/cmake/modules/IdentifyPlatform.cmake b/infra/cmake/modules/IdentifyPlatform.cmake
index 6616283fb..890055fae 100644
--- a/infra/cmake/modules/IdentifyPlatform.cmake
+++ b/infra/cmake/modules/IdentifyPlatform.cmake
@@ -35,6 +35,8 @@ endif()
 
 if("${HOST_ARCH}" STREQUAL "x86_64")
   set(HOST_ARCH_BASE ${HOST_ARCH})
+elseif("${HOST_ARCH}" STREQUAL "armv7em")
+  set(HOST_ARCH_BASE "arm")
 elseif("${HOST_ARCH}" STREQUAL "armv7l")
   set(HOST_ARCH_BASE "arm")
 elseif("${HOST_ARCH}" STREQUAL "armv7hl")
@@ -49,6 +51,8 @@ endif()
 
 if("${TARGET_ARCH}" STREQUAL "x86_64")
   set(TARGET_ARCH_BASE ${TARGET_ARCH})
+elseif("${TARGET_ARCH}" STREQUAL "armv7em")
+  set(TARGET_ARCH_BASE "arm")
 elseif("${TARGET_ARCH}" STREQUAL "armv7l")
   set(TARGET_ARCH_BASE "arm")
 elseif("${TARGET_ARCH}" STREQUAL "armv7hl")
diff --git a/infra/cmake/packages/AbseilConfig.cmake b/infra/cmake/packages/AbseilConfig.cmake
index 6fae7211d..b3cb364e1 100644
--- a/infra/cmake/packages/AbseilConfig.cmake
+++ b/infra/cmake/packages/AbseilConfig.cmake
@@ -12,11 +12,18 @@ function(_Abseil_import)
 
     # NOTE Turn off abseil testing
     set(BUILD_TESTING OFF)
+    # Set -fPIC property because Abseil-cpp can be used for shared library
+    set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+    # Abseil-cpp 20211102.0 show warning without below setting
+    set(ABSL_PROPAGATE_CXX_STD ON)
+
     add_extdirectory("${AbseilSource_DIR}" ABSEIL)
 
     add_library(abseil INTERFACE)
+
     target_link_libraries(abseil INTERFACE
       # From "Available Abseil CMake Public Targets" in CMake/README.md
+      # Add absl::status (It is not listed in CMake/README.md)
       absl::algorithm
       absl::base
       absl::debugging
@@ -27,19 +34,14 @@ function(_Abseil_import)
       absl::numeric
       absl::random_random
       absl::strings
-      absl::status
       absl::synchronization
       absl::time
       absl::utility
+      absl::status
     )
   endif(NOT TARGET abseil)
 
   set(Abseil_FOUND TRUE PARENT_SCOPE)
 endfunction(_Abseil_import)
 
-set(CMAKE_C_FLAGS_DEBUG     "${CMAKE_C_FLAGS_DEBUG} -fPIC")
-set(CMAKE_CXX_FLAGS_DEBUG   "${CMAKE_CXX_FLAGS_DEBUG} -fPIC")
-set(CMAKE_C_FLAGS_RELEASE   "${CMAKE_C_FLAGS_RELEASE} -fPIC")
-set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fPIC")
-
 _Abseil_import()
diff --git a/infra/cmake/packages/AbseilSourceConfig.cmake b/infra/cmake/packages/AbseilSourceConfig.cmake
index 8aeb86db3..0297c08bc 100644
--- a/infra/cmake/packages/AbseilSourceConfig.cmake
+++ b/infra/cmake/packages/AbseilSourceConfig.cmake
@@ -7,14 +7,13 @@ function(_AbseilSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  # NOTE TensorFlow 2.3 downloads abseil from the following URL
+  # NOTE TensorFlow 2.9 downloads abseil 20211102.0
   envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
-  envoption(ABSEIL_URL ${EXTERNAL_DOWNLOAD_SERVER}/abseil/abseil-cpp/archive/df3ea785d8c30a9503321a3d35ee7d35808f190d.tar.gz)
-
+  envoption(ABSEIL_URL ${EXTERNAL_DOWNLOAD_SERVER}/abseil/abseil-cpp/archive/20211102.0.tar.gz)
   ExternalSource_Download(ABSEIL
     DIRNAME ABSEIL
     URL ${ABSEIL_URL}
-    CHECKSUM MD5=4d9aa7e757adf48fef171c85f0d88552)
+    CHECKSUM MD5=bdca561519192543378b7cade101ec43)
 
   set(AbseilSource_DIR ${ABSEIL_SOURCE_DIR} PARENT_SCOPE)
   set(AbseilSource_FOUND TRUE PARENT_SCOPE)
diff --git a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake
index 99118c5d9..d1588d3fd 100644
--- a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake
+++ b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake
@@ -2,7 +2,8 @@ function(_CMSISSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(CMSIS_5_8_0_URL https://github.com/ARM-software/CMSIS_5/archive/refs/tags/5.8.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(CMSIS_5_8_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/CMSIS_5/archive/refs/tags/5.8.0.tar.gz)
   set(CMSIS_5_8_0_SHA256 fe6b697b8782e7fd6131034b7646a3b65c83018774abf7f9f94901a3bc7c82ad)
 
   ExternalSource_Download(CMSIS DIRNAME CMSIS-5.8.0 ${CMSIS_5_8_0_URL}
diff --git a/infra/cmake/packages/CaffeSourceConfig.cmake b/infra/cmake/packages/CaffeSourceConfig.cmake
index 41cc2c9f7..05eb5b30e 100644
--- a/infra/cmake/packages/CaffeSourceConfig.cmake
+++ b/infra/cmake/packages/CaffeSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_CaffeSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(CAFFE_URL https://github.com/BVLC/caffe/archive/1.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(CAFFE_URL ${EXTERNAL_DOWNLOAD_SERVER}/BVLC/caffe/archive/1.0.tar.gz)
 
   ExternalSource_Download(CAFFE ${CAFFE_URL})
 
diff --git a/infra/cmake/packages/CpuInfoSourceConfig.cmake b/infra/cmake/packages/CpuInfoSourceConfig.cmake
index 60419ad9f..b93a6a2e5 100644
--- a/infra/cmake/packages/CpuInfoSourceConfig.cmake
+++ b/infra/cmake/packages/CpuInfoSourceConfig.cmake
@@ -8,8 +8,8 @@ function(_CpuInfoSource_import)
   nnas_include(OptionTools)
 
   envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
-  # CPUINFO commit including patch from tflite v2.3
-  envoption(CPUINFO_URL ${EXTERNAL_DOWNLOAD_SERVER}/pytorch/cpuinfo/archive/63b254577ed77a8004a9be6ac707f3dccc4e1fd9.tar.gz)
+  # CPUINFO commit from tflite v2.8
+  envoption(CPUINFO_URL ${EXTERNAL_DOWNLOAD_SERVER}/pytorch/cpuinfo/archive/5916273f79a21551890fd3d56fc5375a78d1598d.tar.gz)
   ExternalSource_Download(CPUINFO
     DIRNAME CPUINFO
     URL ${CPUINFO_URL})
diff --git a/infra/cmake/packages/Egl_HeadersSourceConfig.cmake b/infra/cmake/packages/Egl_HeadersSourceConfig.cmake
new file mode 100644
index 000000000..fae57f6ce
--- /dev/null
+++ b/infra/cmake/packages/Egl_HeadersSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_Egl_HeadersSource_import)
+  if(NOT DOWNLOAD_EGL_HEADERS)
+    set(Egl_HeadersSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_EGL_HEADERS)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(EGL_HEADERS_URL ${EXTERNAL_DOWNLOAD_SERVER}/KhronosGroup/EGL-Registry/archive/649981109e263b737e7735933c90626c29a306f2.zip)
+
+  ExternalSource_Download(EGL_HEADERS
+    DIRNAME EGL_HEADERS
+    URL ${EGL_HEADERS_URL})
+
+  set(Egl_HeadersSource_DIR ${EGL_HEADERS_SOURCE_DIR} PARENT_SCOPE)
+  set(Egl_HeadersSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_Egl_HeadersSource_import)
+
+_Egl_HeadersSource_import()
diff --git a/infra/cmake/packages/FarmhashSourceConfig.cmake b/infra/cmake/packages/FarmhashSourceConfig.cmake
index a19c8b992..fa1867c5c 100644
--- a/infra/cmake/packages/FarmhashSourceConfig.cmake
+++ b/infra/cmake/packages/FarmhashSourceConfig.cmake
@@ -10,7 +10,8 @@ function(_FarmhashSource_import)
   # NOTE TensorFlow 1.12 downloads farmhash from the following URL
   #      TensorFlow 1.13.1 downloads farmhash from the following URL
   #      TensorFlow 2.3.0 downloads farmhash from the following URL
-  envoption(FARMHASH_1_12_URL https://github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(FARMHASH_1_12_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz)
 
   ExternalSource_Download(FARMHASH ${FARMHASH_1_12_URL})
 
diff --git a/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake b/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake
index a0a32aa9e..e094055b7 100644
--- a/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake
+++ b/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_FlatBuffersSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(FLATBUFFERS_2_0_URL https://github.com/google/flatbuffers/archive/v2.0.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(FLATBUFFERS_2_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/flatbuffers/archive/v2.0.0.tar.gz)
   ExternalSource_Download(FLATBUFFERS
     DIRNAME FLATBUFFERS-2.0
     CHECKSUM MD5=a27992324c3cbf86dd888268a23d17bd
diff --git a/infra/cmake/packages/Fp16SourceConfig.cmake b/infra/cmake/packages/Fp16SourceConfig.cmake
index 3623fd210..3df4e4cc5 100644
--- a/infra/cmake/packages/Fp16SourceConfig.cmake
+++ b/infra/cmake/packages/Fp16SourceConfig.cmake
@@ -9,7 +9,7 @@ function(_Fp16Source_import)
 
   envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
   # fp16 commit in xnnpack 8b283aa30a31
-  envoption(FP16_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/FP16/archive/3c54eacb74f6f5e39077300c5564156c424d77ba.tar.gz)
+  envoption(FP16_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/FP16/archive/4dfe081cf6bcd15db339cf2680b9281b8451eeb3.tar.gz)
   ExternalSource_Download(FP16
     DIRNAME FP16
     URL ${FP16_URL})
diff --git a/infra/cmake/packages/GEMMLowpSourceConfig.cmake b/infra/cmake/packages/GEMMLowpSourceConfig.cmake
index 6e1cfa9c9..3b3560359 100644
--- a/infra/cmake/packages/GEMMLowpSourceConfig.cmake
+++ b/infra/cmake/packages/GEMMLowpSourceConfig.cmake
@@ -9,7 +9,8 @@ function(_GEMMLowpSource_import)
 
   # NOTE TensorFlow 1.12 uses the following URL
   #      TensorFlow 1.13.1 uses the following URL
-  envoption(GEMMLOWP_URL https://github.com/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.tar.gz)
 
   ExternalSource_Download(GEMMLOWP ${GEMMLOWP_URL})
 
diff --git a/infra/cmake/packages/GFlagsSourceConfig.cmake b/infra/cmake/packages/GFlagsSourceConfig.cmake
index 3e70d89fc..2f9b7537f 100644
--- a/infra/cmake/packages/GFlagsSourceConfig.cmake
+++ b/infra/cmake/packages/GFlagsSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_GFlagsSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(GFLAGS_URL https://github.com/gflags/gflags/archive/v2.2.1.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(GFLAGS_URL ${EXTERNAL_DOWNLOAD_SERVER}/gflags/gflags/archive/v2.2.1.tar.gz)
 
   ExternalSource_Download(GFLAGS ${GFLAGS_URL})
 
diff --git a/infra/cmake/packages/GTestSourceConfig.cmake b/infra/cmake/packages/GTestSourceConfig.cmake
index e57d0965a..643c3d109 100644
--- a/infra/cmake/packages/GTestSourceConfig.cmake
+++ b/infra/cmake/packages/GTestSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_GTestSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(GTEST_URL https://github.com/google/googletest/archive/release-1.11.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(GTEST_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/googletest/archive/release-1.11.0.tar.gz)
 
   ExternalSource_Download(GTEST ${GTEST_URL})
 
diff --git a/infra/cmake/packages/HDF5SourceConfig.cmake b/infra/cmake/packages/HDF5SourceConfig.cmake
index 9db048c86..3440dbd20 100644
--- a/infra/cmake/packages/HDF5SourceConfig.cmake
+++ b/infra/cmake/packages/HDF5SourceConfig.cmake
@@ -7,7 +7,8 @@ function(_HDF5Source_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(HDF5_URL https://github.com/HDFGroup/hdf5/archive/hdf5-1_8_16.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(HDF5_URL ${EXTERNAL_DOWNLOAD_SERVER}/HDFGroup/hdf5/archive/hdf5-1_8_16.tar.gz)
 
   ExternalSource_Download(HDF5 ${HDF5_URL}
                           PATCH ${CMAKE_CURRENT_LIST_DIR}/HDF5Source.patch)
diff --git a/infra/cmake/packages/JsoncppSourceConfig.cmake b/infra/cmake/packages/JsoncppSourceConfig.cmake
index 3195ea479..8d672854b 100644
--- a/infra/cmake/packages/JsoncppSourceConfig.cmake
+++ b/infra/cmake/packages/JsoncppSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_JsoncppSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(JSONCPP_URL https://github.com/open-source-parsers/jsoncpp/archive/refs/tags/1.9.5.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(JSONCPP_URL ${EXTERNAL_DOWNLOAD_SERVER}/open-source-parsers/jsoncpp/archive/refs/tags/1.9.5.tar.gz)
 
   ExternalSource_Download(JSONCPP ${JSONCPP_URL})
 
diff --git a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake
index 805554538..e55647da8 100644
--- a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake
+++ b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake
@@ -2,7 +2,8 @@ function(_MbedOSSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(MBEDOS_6_15_URL https://github.com/ARMmbed/mbed-os/archive/refs/tags/mbed-os-6.15.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(MBEDOS_6_15_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARMmbed/mbed-os/archive/refs/tags/mbed-os-6.15.0.tar.gz)
   set(MBEDOS_6_15_SHA256 529b04c41f3020ed8a62f12d47f2d3de87e1b07fb13708534534a587f7ea048e)
 
   ExternalSource_Download(MBEDOS DIRNAME MBEDOS-6.15 ${MBEDOS_6_15_URL}
diff --git a/infra/cmake/packages/NEON2SSESourceConfig.cmake b/infra/cmake/packages/NEON2SSESourceConfig.cmake
index bd40267a5..82c71e2a8 100644
--- a/infra/cmake/packages/NEON2SSESourceConfig.cmake
+++ b/infra/cmake/packages/NEON2SSESourceConfig.cmake
@@ -8,10 +8,10 @@ function(_NEON2SSESource_import)
   nnas_include(OptionTools)
 
   # NOTE TensorFlow 1.13.1 downloads NEON2SSE from the following URL
-  # NOTE TensorFlow 2.1 downloads NEON2SSE from the following URL
-  # NOTE TensorFlow 2.2 downloads NEON2SSE from the following URL
-  # NOTE TensorFlow 2.3 downloads NEON2SSE from the following URL
-  envoption(NEON2SSE_URL https://github.com/intel/ARM_NEON_2_x86_SSE/archive/1200fe90bb174a6224a525ee60148671a786a71f.tar.gz)
+  # NOTE TensorFlow 2.8.0 downloads NEON2SSE from the following URL
+  # NOTE commit c12f8932c3be5aebaf35562d699f645686c4e2c3 will resolve build fail on debug build
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(NEON2SSE_URL ${EXTERNAL_DOWNLOAD_SERVER}/intel/ARM_NEON_2_x86_SSE/archive/1200fe90bb174a6224a525ee60148671a786a71f.tar.gz)
 
   ExternalSource_Download(NEON2SSE ${NEON2SSE_URL})
 
diff --git a/infra/cmake/packages/ONNXSource-1.4.1/ONNXSourceConfig.cmake b/infra/cmake/packages/ONNXSource-1.4.1/ONNXSourceConfig.cmake
index c9fb5e490..fe21f6d3d 100644
--- a/infra/cmake/packages/ONNXSource-1.4.1/ONNXSourceConfig.cmake
+++ b/infra/cmake/packages/ONNXSource-1.4.1/ONNXSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_ONNXSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(ONNX_1_4_1_URL https://github.com/onnx/onnx/archive/v1.4.1.zip)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(ONNX_1_4_1_URL ${EXTERNAL_DOWNLOAD_SERVER}/onnx/onnx/archive/v1.4.1.zip)
 
   ExternalSource_Download(ONNX DIRNAME ONNX-1.4.1
                                CHECKSUM MD5=604b43a22fbc758f32ae9f3a4fb9d397
diff --git a/infra/cmake/packages/ONNXSource-1.6.0/ONNXSourceConfig.cmake b/infra/cmake/packages/ONNXSource-1.6.0/ONNXSourceConfig.cmake
index ef903f834..b2ad08b90 100644
--- a/infra/cmake/packages/ONNXSource-1.6.0/ONNXSourceConfig.cmake
+++ b/infra/cmake/packages/ONNXSource-1.6.0/ONNXSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_ONNXSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(ONNX_1_6_0_URL https://github.com/onnx/onnx/archive/v1.6.0.zip)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(ONNX_1_6_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/onnx/onnx/archive/v1.6.0.zip)
 
   ExternalSource_Download(ONNX DIRNAME ONNX-1.6.0
                                CHECKSUM MD5=cbdc547a527f1b59c7f066c8d258b966
diff --git a/infra/cmake/packages/OouraFFTSourceConfig.cmake b/infra/cmake/packages/OouraFFTSourceConfig.cmake
index be551fbe4..d84b5b20f 100644
--- a/infra/cmake/packages/OouraFFTSourceConfig.cmake
+++ b/infra/cmake/packages/OouraFFTSourceConfig.cmake
@@ -8,7 +8,8 @@ function(_OouraFFTSource_import)
   nnas_include(OptionTools)
 
   # NOTE TensorFlow 2.3 downloads OOURAFFT from the following URL
-  envoption(OOURAFFT_URL https://github.com/petewarden/OouraFFT/archive/v1.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(OOURAFFT_URL ${EXTERNAL_DOWNLOAD_SERVER}/petewarden/OouraFFT/archive/v1.0.tar.gz)
 
   ExternalSource_Download(OOURAFFT ${OOURAFFT_URL})
 
diff --git a/infra/cmake/packages/Opengl_HeadersSourceConfig.cmake b/infra/cmake/packages/Opengl_HeadersSourceConfig.cmake
new file mode 100644
index 000000000..c5a774a73
--- /dev/null
+++ b/infra/cmake/packages/Opengl_HeadersSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_Opengl_HeadersSource_import)
+  if(NOT DOWNLOAD_OPENGL_HEADERS)
+    set(Opengl_HeadersSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_OPENGL_HEADERS)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(OPENGL_HEADERS_URL ${EXTERNAL_DOWNLOAD_SERVER}/KhronosGroup/OpenGL-Registry/archive/0cb0880d91581d34f96899c86fc1bf35627b4b81.zip)
+
+  ExternalSource_Download(OPENGL_HEADERS
+    DIRNAME OPENGL_HEADERS
+    URL ${OPENGL_HEADERS_URL})
+
+  set(Opengl_HeadersSource_DIR ${OPENGL_HEADERS_SOURCE_DIR} PARENT_SCOPE)
+  set(Opengl_HeadersSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_Opengl_HeadersSource_import)
+
+_Opengl_HeadersSource_import()
diff --git a/infra/cmake/packages/ProtobufSourceConfig.cmake b/infra/cmake/packages/ProtobufSourceConfig.cmake
index baa49eeb0..a1704e53d 100644
--- a/infra/cmake/packages/ProtobufSourceConfig.cmake
+++ b/infra/cmake/packages/ProtobufSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_ProtobufSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(PROTOBUF_URL https://github.com/protocolbuffers/protobuf/archive/v3.5.2.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(PROTOBUF_URL ${EXTERNAL_DOWNLOAD_SERVER}/protocolbuffers/protobuf/archive/v3.5.2.tar.gz)
 
   ExternalSource_Download(PROTOBUF ${PROTOBUF_URL}
                           PATCH ${CMAKE_CURRENT_LIST_DIR}/ProtobufSource.patch)
diff --git a/infra/cmake/packages/Pybind11SourceConfig.cmake b/infra/cmake/packages/Pybind11SourceConfig.cmake
index 76f51e4d3..2f6425355 100644
--- a/infra/cmake/packages/Pybind11SourceConfig.cmake
+++ b/infra/cmake/packages/Pybind11SourceConfig.cmake
@@ -7,7 +7,8 @@ function(_Pybind11Source_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(PYBIND11_URL https://github.com/pybind/pybind11/archive/v2.5.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(PYBIND11_URL ${EXTERNAL_DOWNLOAD_SERVER}/pybind/pybind11/archive/v2.5.0.tar.gz)
 
   ExternalSource_Download(PYBIND11 ${PYBIND11_URL})
 
diff --git a/infra/cmake/packages/PytorchSourceConfig.cmake b/infra/cmake/packages/PytorchSourceConfig.cmake
index 0212f2f4b..94757f865 100644
--- a/infra/cmake/packages/PytorchSourceConfig.cmake
+++ b/infra/cmake/packages/PytorchSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_PytorchSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(PYTORCH_URL https://github.com/pytorch/pytorch/archive/v0.4.1.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(PYTORCH_URL ${EXTERNAL_DOWNLOAD_SERVER}/pytorch/pytorch/archive/v0.4.1.tar.gz)
 
   ExternalSource_Download(PYTORCH ${PYTORCH_URL})
 
diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.1.0/TensorFlowEigenSourceConfig.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.1.0/TensorFlowEigenSourceConfig.cmake
index f84675596..8120ebca2 100644
--- a/infra/cmake/packages/TensorFlowEigenSource-2.1.0/TensorFlowEigenSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowEigenSource-2.1.0/TensorFlowEigenSourceConfig.cmake
@@ -9,7 +9,8 @@ function(_TensorFlowEigenSource_import)
 
   # Exact version used by TensorFlow v2.1.0.
   # See tensorflow/tensorflow/workspace.bzl.
-  envoption(TENSORFLOW_2_1_0_EIGEN_URL https://gitlab.com/libeigen/eigen/-/archive/4e696901f873a2347f76d931cf2f701e31e15d05/eigen-4e696901f873a2347f76d931cf2f701e31e15d05.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://gitlab.com")
+  envoption(TENSORFLOW_2_1_0_EIGEN_URL ${EXTERNAL_DOWNLOAD_SERVER}/libeigen/eigen/-/archive/4e696901f873a2347f76d931cf2f701e31e15d05/eigen-4e696901f873a2347f76d931cf2f701e31e15d05.tar.gz)
 
   ExternalSource_Download(EIGEN DIRNAME TENSORFLOW-2.1.0-EIGEN ${TENSORFLOW_2_1_0_EIGEN_URL})
 
diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfig.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfig.cmake
new file mode 100644
index 000000000..6f59f0771
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_TensorFlowEigenSource_import)
+  if(NOT DOWNLOAD_EIGEN)
+    set(TensorFlowEigenSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_EIGEN)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  # Exact version used by TensorFlow v2.8.0.
+  # See tensorflow/third_party/eigen3/workspace.bzl.
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://gitlab.com")
+  envoption(TENSORFLOW_2_8_0_EIGEN_URL ${EXTERNAL_DOWNLOAD_SERVER}/libeigen/eigen/-/archive/008ff3483a8c5604639e1c4d204eae30ad737af6/eigen-e1dd31ce174c3d26fbe38388f64b09d2adbd7557a59e90e6f545a288cc1755fc.tar.gz)
+
+  ExternalSource_Download(EIGEN DIRNAME TENSORFLOW-2.8.0-EIGEN ${TENSORFLOW_2_8_0_EIGEN_URL})
+
+  set(TensorFlowEigenSource_DIR ${EIGEN_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowEigenSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowEigenSource_import)
+
+_TensorFlowEigenSource_import()
diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfigVersion.cmake
new file mode 100644
index 000000000..2ad2e241e
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.8.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.1.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.1.0/TensorFlowGEMMLowpSourceConfig.cmake
index 035264fa9..421be6c66 100644
--- a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.1.0/TensorFlowGEMMLowpSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.1.0/TensorFlowGEMMLowpSourceConfig.cmake
@@ -9,7 +9,8 @@ function(_TensorFlowGEMMLowpSource_import)
 
   # Exact version used by TensorFlow v2.1.0.
   # See tensorflow/tensorflow/workspace.bzl.
-  envoption(TENSORFLOW_2_1_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/12fed0cd7cfcd9e169bf1925bc3a7a58725fdcc3.zip)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_1_0_GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/12fed0cd7cfcd9e169bf1925bc3a7a58725fdcc3.zip)
 
   ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.1.0-GEMMLOWP ${TENSORFLOW_2_1_0_GEMMLOWP_URL})
 
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake
index bc13d6227..44c56a6be 100644
--- a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake
@@ -9,7 +9,8 @@ function(_TensorFlowGEMMLowpSource_import)
 
   # Exact version used by TensorFlow v2.3.0.
   # See tensorflow/tensorflow/workspace.bzl.
-  envoption(TENSORFLOW_2_3_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_3_0_GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
 
   ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.3.0-GEMMLOWP ${TENSORFLOW_2_3_0_GEMMLOWP_URL})
 
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake
index b7f3148e8..76cdfdd6c 100644
--- a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake
@@ -9,7 +9,8 @@ function(_TensorFlowGEMMLowpSource_import)
 
   # Exact version used by TensorFlow v2.6.0.
   # See tensorflow/third_party/gemmlowp/workspace.bzl.
-  envoption(TENSORFLOW_2_6_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_6_0_GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
 
   ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.6.0-GEMMLOWP ${TENSORFLOW_2_6_0_GEMMLOWP_URL})
 
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake
index f3663cc78..3e17490c3 100644
--- a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake
@@ -9,7 +9,8 @@ function(_TensorFlowGEMMLowpSource_import)
 
   # Exact version used by TensorFlow v2.8.0.
   # See tensorflow/third_party/gemmlowp/workspace.bzl.
-  envoption(TENSORFLOW_2_8_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_8_0_GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
 
   ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.8.0-GEMMLOWP ${TENSORFLOW_2_8_0_GEMMLOWP_URL})
 
diff --git a/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake b/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake
index f1debe775..369816a5e 100644
--- a/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake
@@ -13,7 +13,7 @@ function(_TensorFlowGpuSource_Import)
       set(PATCH_DONE "TRUE")
     endif()
   endif()
-  
+
   if(${PATCH_DONE} STREQUAL "TRUE")
     message(STATUS "Skip downloading TensorFlowGpuSource")
     set(TENSORFLOWGPU_SOURCE_DIR "${NNAS_EXTERNALS_DIR}/TENSORFLOW_GPU" PARENT_SCOPE)
@@ -28,7 +28,8 @@ function(_TensorFlowGpuSource_Import)
   # Download TFLite Source Code
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
-  envoption(TENSORFLOW_2_4_1_URL https://github.com/tensorflow/tensorflow/archive/v2.4.1.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_4_1_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.4.1.tar.gz)
   ExternalSource_Download(TFLITE_GPU_DELEGATE DIRNAME TENSORFLOW-2.4.1 ${TENSORFLOW_2_4_1_URL})
 
   # Patch for non used codes on onert backend/gpu_cl
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake b/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake
index 3dbf05ece..3a7dc893c 100644
--- a/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake
@@ -9,7 +9,8 @@ function(_TensorFlowRuySource_import)
 
   # Exact version used by TensorFlow v2.3.0.
   # See tensorflow/third_party/ruy/workspace.bzl
-  envoption(TENSORFLOW_2_3_0_RUY_URL https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_3_0_RUY_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip)
 
   ExternalSource_Download(RUY DIRNAME TENSORFLOW-2.3.0-RUY ${TENSORFLOW_2_3_0_RUY_URL})
 
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake
index b4dee914f..e4dd4f2bf 100644
--- a/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake
@@ -9,7 +9,8 @@ function(_TensorFlowRuySource_import)
 
   # Exact version used by TensorFlow v2.6.0.
   # See tensorflow/third_party/ruy/workspace.bzl
-  envoption(TENSORFLOW_2_6_0_RUY_URL https://github.com/google/ruy/archive/e6c1b8dc8a8b00ee74e7268aac8b18d7260ab1ce.zip)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_6_0_RUY_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/ruy/archive/e6c1b8dc8a8b00ee74e7268aac8b18d7260ab1ce.zip)
 
   ExternalSource_Download(RUY DIRNAME TENSORFLOW-2.6.0-RUY ${TENSORFLOW_2_6_0_RUY_URL})
 
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfig.cmake b/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfig.cmake
new file mode 100644
index 000000000..2ead7cd51
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_TensorFlowRuySource_import)
+  if(NOT DOWNLOAD_RUY)
+    set(TensorFlowRuySource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_RUY)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  # Exact version used by TensorFlow v2.8.0.
+  # See tensorflow/third_party/ruy/workspace.bzl
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_8_0_RUY_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/ruy/archive/e6c1b8dc8a8b00ee74e7268aac8b18d7260ab1ce.zip)
+
+  ExternalSource_Download(RUY DIRNAME TENSORFLOW-2.8.0-RUY ${TENSORFLOW_2_8_0_RUY_URL})
+
+  set(TensorFlowRuySource_DIR ${RUY_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowRuySource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowRuySource_import)
+
+_TensorFlowRuySource_import()
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfigVersion.cmake
new file mode 100644
index 000000000..2ad2e241e
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.8.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowSource-1.14/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-1.14/TensorFlowSourceConfig.cmake
index bcdf9f28c..33538c234 100644
--- a/infra/cmake/packages/TensorFlowSource-1.14/TensorFlowSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowSource-1.14/TensorFlowSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_TensorFlowSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(TENSORFLOW_1_14_URL https://github.com/tensorflow/tensorflow/archive/v1.14.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_1_14_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v1.14.0.tar.gz)
 
   ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-1.14 ${TENSORFLOW_1_14_URL})
 
diff --git a/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake
index 0d2a95056..aabc22f72 100644
--- a/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_TensorFlowSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(TENSORFLOW_2_1_0_URL https://github.com/tensorflow/tensorflow/archive/v2.1.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_1_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.1.0.tar.gz)
 
   ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.1.0 ${TENSORFLOW_2_1_0_URL})
 
diff --git a/infra/cmake/packages/TensorFlowSource-2.2.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.2.0/TensorFlowSourceConfig.cmake
index 71220d743..7dabf88c8 100644
--- a/infra/cmake/packages/TensorFlowSource-2.2.0/TensorFlowSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowSource-2.2.0/TensorFlowSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_TensorFlowSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(TENSORFLOW_2_2_0_URL https://github.com/tensorflow/tensorflow/archive/v2.2.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_2_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.2.0.tar.gz)
 
   ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.2.0 ${TENSORFLOW_2_2_0_URL})
 
diff --git a/infra/cmake/packages/TensorFlowSource-2.3.0-rc0Config.cmake b/infra/cmake/packages/TensorFlowSource-2.3.0-rc0Config.cmake
index 82df579a1..967d49e87 100644
--- a/infra/cmake/packages/TensorFlowSource-2.3.0-rc0Config.cmake
+++ b/infra/cmake/packages/TensorFlowSource-2.3.0-rc0Config.cmake
@@ -10,7 +10,8 @@ function(_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(TENSORFLOW_2_3_0_RC0_URL https://github.com/tensorflow/tensorflow/archive/v2.3.0-rc0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_3_0_RC0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.3.0-rc0.tar.gz)
 
   ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.3.0-RC0 ${TENSORFLOW_2_3_0_RC0_URL})
 
diff --git a/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake
index 5c3a0f8cc..0ad0cda0b 100644
--- a/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_TensorFlowSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(TENSORFLOW_2_3_0_URL https://github.com/tensorflow/tensorflow/archive/v2.3.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_3_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.3.0.tar.gz)
 
   ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.3.0 ${TENSORFLOW_2_3_0_URL})
 
diff --git a/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake
index 611c7c805..9a7af17b1 100644
--- a/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_TensorFlowSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(TENSORFLOW_2_6_0_URL https://github.com/tensorflow/tensorflow/archive/v2.6.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_6_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.6.0.tar.gz)
 
   ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.6.0 ${TENSORFLOW_2_6_0_URL})
 
diff --git a/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake
index 4abe2eae6..988a0f49f 100644
--- a/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_TensorFlowSource_import)
   nnas_include(ExternalSourceTools)
   nnas_include(OptionTools)
 
-  envoption(TENSORFLOW_2_8_0_URL https://github.com/tensorflow/tensorflow/archive/v2.8.0.tar.gz)
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(TENSORFLOW_2_8_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.8.0.tar.gz)
 
   ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.8.0 ${TENSORFLOW_2_8_0_URL})
 
diff --git a/infra/cmake/packages/VulkanSourceConfig.cmake b/infra/cmake/packages/VulkanSourceConfig.cmake
new file mode 100644
index 000000000..76b69898e
--- /dev/null
+++ b/infra/cmake/packages/VulkanSourceConfig.cmake
@@ -0,0 +1,20 @@
+function(_VulkanSource_import)
+  if(NOT ${DOWNLOAD_VULKAN})
+    set(VulkanSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT ${DOWNLOAD_VULKAN})
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+  envoption(VULKAN_URL ${EXTERNAL_DOWNLOAD_SERVER}/KhronosGroup/Vulkan-Headers/archive/ec2db85225ab410bc6829251bef6c578aaed5868.tar.gz)
+  ExternalSource_Download(VULKAN
+    DIRNAME VULKAN
+    URL ${VULKAN_URL})
+
+  set(VulkanSource_DIR ${VULKAN_SOURCE_DIR} PARENT_SCOPE)
+  set(VulkanSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_VulkanSource_import)
+
+_VulkanSource_import()
diff --git a/infra/command/format b/infra/command/format
index 5cf9606fa..993a6ad5a 100644
--- a/infra/command/format
+++ b/infra/command/format
@@ -154,11 +154,9 @@ function check_python_files() {
   fi
 
   # Check python files
-  FILES_TO_CHECK_PYTHON=`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '\.py$'`
+  FILES_TO_CHECK_PYTHON=(`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '\.py$'`)
   # Exceptional case: one-cmds don't have '.py' extension: ignore non-python source (cmake, etc) and ignore shell script: one-prepare-venv
-  FILES_TO_CHECK_PYTHON=`echo "$FILES_TO_CHECK_PYTHON" | egrep -v '^compiler/one-cmds/.*\..*$' | egrep -v '^compiler/one-cmds/one-prepare-venv$'`
-  # Transform to array
-  FILES_TO_CHECK_PYTHON=($FILES_TO_CHECK_PYTHON)
+  FILES_TO_CHECK_PYTHON+=(`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '^compiler/one-cmds/[^(\./)]*$' | egrep -v '^compiler/one-cmds/one-prepare-venv$'`)
 
   for s in ${DIRECTORIES_NOT_TO_BE_TESTED[@]}; do
     skip=${s#'.'/}/
diff --git a/infra/command/gen-coverage-report b/infra/command/gen-coverage-report
index 3058aee9a..df6377d2a 100644
--- a/infra/command/gen-coverage-report
+++ b/infra/command/gen-coverage-report
@@ -69,10 +69,10 @@ done
 
 opencl_files=($(find ./runtime/onert/backend/gpu_cl/open_cl/ \( -name "*.cc" -o -name "*.h" \) -exec realpath {} \; ))
 
-# Exclude *.test.cpp files from coverage report
+# Exclude test files from coverage report
 # Exclude flatbuffer generated files from coverage report
 "${LCOV_PATH}" -r "${EXTRACTED_COVERAGE_INFO_PATH}" -o "${EXCLUDED_COVERAGE_INFO_PATH}" \
-  '*.test.cpp' '*_schema_generated.h' "${opencl_files[@]}"
+  '*.test.cpp' '*.test.cc' '*/test/*' '*/tests/*' '*_schema_generated.h' "${opencl_files[@]}"
 
 # Final coverage data
 cp -v ${EXCLUDED_COVERAGE_INFO_PATH} ${COVERAGE_INFO_PATH}
diff --git a/infra/debian/compiler/changelog b/infra/debian/compiler/changelog
index 2763ac55b..ddca70a17 100644
--- a/infra/debian/compiler/changelog
+++ b/infra/debian/compiler/changelog
@@ -1,3 +1,50 @@
+one (1.21.0) bionic; urgency=medium
+
+  * Support unrolling of LSTM and RNN Ops in `one-import-onnx` tool
+  * Introduced new tools `one-infer`, `circle-operator`, `circle-interpreter`
+  * Introduced `Workflow`(WIP) in `one-cmds`
+  * New option `quant_config` in `one-quantize`
+  * New option `fake_quantize` in `one-quantize`
+  * More Ops supported: Densify
+  * More Ops for quantization: ReduceMax
+  * More Ops for mixed-precision quantization (MPQ): LeakyRelu, Neg, Relu6, Squeeze
+  * More Ops for `convert_nchw_to_nhwc` option: LogSoftmax, ReduceMax, SplitV, Softmax
+  * New optimization options in `one-optimize`: `replace_non_const_fc_with_bmm`, `resolve_customop_splitv`, `fold_densify`
+  * Improved reshape elimination in `convert_nchw_to_nhwc` option.
+  * Support fusion of Channel-wise Add + Relu with TConv
+  * Support negative axis in ArgMin/Max
+  * Show errors for unrecognized options in `one-optimize`
+  * Fix shape inference for `StridedSlice`
+  * Fix FuseBatchNormWithTConvPass to support TConv with bias
+  * Deprecate `--O1` option in `circle2circle`
+  * Support gcc-11
+  * Support limited Float16 for kernels constants with dequantization to Float32
+
+ -- seongwoo <mhs4670go@naver.com>  Wed, 06 Sep 2022 12:00:00 +0900
+
+one (1.20.0) bionic; urgency=medium
+
+  * luci-interpreter supports multiple kernels with PAL layer including Cortext-M
+  * luci-interpreter supports integer tensor for partly kernels
+  * luci import support constant without coping to reduce memory for luci-interpreter
+  * Reduce duplicate codes to package released modules
+  * Limited support for ONNX LSTM/RNN unrolling while importing
+  * Limited support for ARM32 cross build
+  * Support new operator: SVDF
+  * New virtual CircleVariable to support tensor with variable
+  * Support quantization of BatchMatMul Op
+  * Support mixed(UINT8 + INT16) quantization
+  * Support backward propagation of quantization parameters
+  * Upgrade default python to version 3.8
+  * Support TensorFlow 2.8.0, ONNX-TF 1.10.0, ONNX 1.11.0
+  * Upgrade circle schema to follow tflite schema v3b
+  * Refactor to mio-tflite280, mio-circle04 with version and helpers methods
+  * Use one flatbuffers 2.0 version
+  * Drop support for TensorFlow 1.x
+  * Fix for several bugs, performance enhancements, and typos
+
+ -- seongwoo <mhs4670go@naver.com>  Tue, 26 Apr 2022 12:00:00 +0900
+
 one (1.19.0) bionic; urgency=medium
 
   * `circle-quantizer` supports input/output type option
diff --git a/infra/debian/compiler/docs/one-infer.1 b/infra/debian/compiler/docs/one-infer.1
new file mode 100644
index 000000000..a1bafbb12
--- /dev/null
+++ b/infra/debian/compiler/docs/one-infer.1
@@ -0,0 +1,46 @@
+.TH ONE-INFER "1" "July 2022" "one-infer version 1.21.0" "User Commands"
+.SH NAME
+one-infer \- manual page for one-infer version 1.21.0
+.SH DESCRIPTION
+usage: one\-infer [\-h] [\-v] [\-C CONFIG] [\-d DRIVER | \fB\-b\fR BACKEND] [\-\-post\-process POST_PROCESS] [\-\-] [COMMANDS FOR BACKEND DRIVER]
+.PP
+command line tool to infer model
+.SS "optional arguments:"
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help message and exit
+.TP
+\fB\-v\fR, \fB\-\-version\fR
+show program's version number and exit
+.TP
+\fB\-V\fR, \fB\-\-verbose\fR
+output additional information to stdout or stderr
+.TP
+\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG
+run with configuation file
+.TP
+\fB\-d\fR DRIVER, \fB\-\-driver\fR DRIVER
+backend inference driver name to execute
+.TP
+\fB\-b\fR BACKEND, \fB\-\-backend\fR BACKEND
+backend name to use
+.TP
+\fB\-\-post\-process\fR POST_PROCESS
+post processing script to convert I/O data to standard
+format
+.SH COPYRIGHT
+Copyright \(co 2020\-2022 Samsung Electronics Co., Ltd. All Rights Reserved
+Licensed under the Apache License, Version 2.0
+https://github.com/Samsung/ONE
+.SH "SEE ALSO"
+The full documentation for
+.B one-infer
+is maintained as a Texinfo manual.  If the
+.B info
+and
+.B one-infer
+programs are properly installed at your site, the command
+.IP
+.B info one-infer
+.PP
+should give you access to the complete manual.
diff --git a/infra/debian/compiler/docs/one-partition.1 b/infra/debian/compiler/docs/one-partition.1
new file mode 100644
index 000000000..5b6fe933d
--- /dev/null
+++ b/infra/debian/compiler/docs/one-partition.1
@@ -0,0 +1,56 @@
+.\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.47.6.
+.TH ONE-PARTITION "1" "June 2022" "one-partition version 1.21.0" "User Commands"
+.SH NAME
+one-partition \- manual page for one-partition version 1.21.0
+.SH DESCRIPTION
+usage: one\-partition [\-h] [\-v] [\-V] [\-C CONFIG] [\-\-backends BACKENDS]
+.TP
+[\-\-default DEFAULT] [\-\-part_file PART_FILE]
+[\-\-input_file INPUT_FILE] [\-\-work_path WORK_PATH]
+.PP
+command line tool to partition circle model by multiple backends
+.SS "optional arguments:"
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help message and exit
+.TP
+\fB\-v\fR, \fB\-\-version\fR
+show program's version number and exit
+.TP
+\fB\-V\fR, \fB\-\-verbose\fR
+output additional information to stdout or stderr
+.TP
+\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG
+run with configuation file
+.TP
+\fB\-\-backends\fR BACKENDS
+backends in CSV to use for partitioning
+.TP
+\fB\-\-default\fR DEFAULT
+default backend to assign
+.TP
+\fB\-\-part_file\fR PART_FILE
+partition file which provides backend to assign
+.TP
+\fB\-\-input_file\fR INPUT_FILE
+input circle model filename
+.TP
+\fB\-\-work_path\fR WORK_PATH
+work path of partition, input files exist and output
+files are produced
+.SH COPYRIGHT
+Copyright \(co 2020\-2022 Samsung Electronics Co., Ltd. All Rights Reserved
+Licensed under the Apache License, Version 2.0
+https://github.com/Samsung/ONE
+.SH "SEE ALSO"
+The full documentation for
+.B one-partition
+is maintained as a Texinfo manual.  If the
+.B info
+and
+.B one-partition
+programs are properly installed at your site, the command
+.IP
+.B info one-partition
+.PP
+should give you access to the complete manual.
diff --git a/infra/debian/compiler/one-compiler.install b/infra/debian/compiler/one-compiler.install
index 805ba8677..65e46d188 100644
--- a/infra/debian/compiler/one-compiler.install
+++ b/infra/debian/compiler/one-compiler.install
@@ -1,6 +1,8 @@
 # {FILES_TO_INSTALL} {DEST_DIR}
 # bin
 usr/bin/circle2circle usr/share/one/bin/
+usr/bin/circle-eval-diff usr/share/one/bin/
+usr/bin/circle-operator usr/share/one/bin/
 usr/bin/circle-partitioner usr/share/one/bin/
 usr/bin/circle-quantizer usr/share/one/bin/
 usr/bin/generate_bcq_metadata.py usr/share/one/bin/
@@ -16,14 +18,21 @@ usr/bin/one-import-bcq usr/share/one/bin/
 usr/bin/one-import-onnx usr/share/one/bin/
 usr/bin/one-import-tf usr/share/one/bin/
 usr/bin/one-import-tflite usr/share/one/bin/
+usr/bin/one-infer usr/share/one/bin/
 usr/bin/one-optimize usr/share/one/bin/
 usr/bin/one-pack usr/share/one/bin/
+usr/bin/one-partition usr/share/one/bin/
 usr/bin/one-prepare-venv usr/share/one/bin/
 usr/bin/one-profile usr/share/one/bin/
 usr/bin/one-quantize usr/share/one/bin/
 usr/bin/one-version usr/share/one/bin/
 usr/bin/onelib/constant.py usr/share/one/bin/onelib/
 usr/bin/onelib/make_cmd.py usr/share/one/bin/onelib/
+usr/bin/onelib/CfgRunner.py usr/share/one/bin/onelib/
+usr/bin/onelib/OptionBuilder.py usr/share/one/bin/onelib/
+usr/bin/onelib/TopologicalSortHelper.py usr/share/one/bin/onelib/
+usr/bin/onelib/WorkflowRunner.py usr/share/one/bin/onelib/
+usr/bin/onnx_legalizer.py usr/share/one/bin/
 usr/bin/rawdata2hdf5 usr/share/one/bin/
 usr/bin/record-minmax usr/share/one/bin/
 usr/bin/tf2nnpkg usr/share/one/bin/
diff --git a/infra/debian/compiler/one-compiler.manpages b/infra/debian/compiler/one-compiler.manpages
index 77f2f4e46..e0284ae4e 100644
--- a/infra/debian/compiler/one-compiler.manpages
+++ b/infra/debian/compiler/one-compiler.manpages
@@ -1,5 +1,6 @@
 debian/docs/one-build.1
 debian/docs/one-codegen.1
+debian/docs/one-infer.1
 debian/docs/one-import.1
 debian/docs/one-import-bcq.1
 debian/docs/one-import-onnx.1
@@ -7,6 +8,7 @@ debian/docs/one-import-tf.1
 debian/docs/one-import-tflite.1
 debian/docs/one-optimize.1
 debian/docs/one-pack.1
+debian/docs/one-partition.1
 debian/docs/one-profile.1
 debian/docs/one-quantize.1
 debian/docs/onecc.1
diff --git a/infra/debian/runtime/changelog b/infra/debian/runtime/changelog
index 4cf0abc30..e07c50c21 100644
--- a/infra/debian/runtime/changelog
+++ b/infra/debian/runtime/changelog
@@ -1,3 +1,18 @@
+one (1.21.0) bionic; urgency=low
+
+  * Runtime supports to run nnpackage with two models
+  * Conv2D and Depthwise Conv2D supports per-channel quantization of uint8 type.
+  * TRIX backend supports batch execution which run in parallel with multicore
+
+ --  Chunseok Lee <chunseok.lee@samsung.com>  Tue, 06 Sep 2022 12:00:00 +0900
+
+one (1.20.0) bionic; urgency=low
+
+  * Introduce TRIX backend
+  * API supports new data type NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED
+
+ --  Chunseok Lee <chunseok.lee@samsung.com>  Wed, 26 Apr 2022 12:00:00 +0900
+
 one (1.19.0) bionic; urgency=low
 
   * Synch up version with ONE Compiler
diff --git a/infra/debian/runtime/rules b/infra/debian/runtime/rules
index dee87a9ed..97170ee09 100755
--- a/infra/debian/runtime/rules
+++ b/infra/debian/runtime/rules
@@ -3,7 +3,7 @@ DEBVER := $(shell dpkg-parsechangelog -SVersion)
 export DH_VERBOSE = 1
 export _DESTDIR = debian/tmp/
 export BUILD_TYPE=release
-export OPTIONS=-DBUILD_LOGGING=0 -DBUILD_TFLITE_COMPARATOR_TEST_TOOL=0 -DBUILD_NNPACKAGE_RUN=0 -DBUILD_TFLITE_RUN=0 -DBUILD_NNAPI_TEST=0 -DBUILD_RUNTIME_NNAPI_TEST=0 -DBUILD_TFLITE_BENCHMARK_MODEL=0 -DBUILD_TFLITE_VANILLA_RUN=0 -DBUILD_TENSORFLOW_LITE_2_3_0=0 -DBUILD_TENSORFLOW_LITE=0
+export OPTIONS=-DBUILD_LOGGING=0 -DBUILD_TFLITE_COMPARATOR_TEST_TOOL=0 -DBUILD_NNPACKAGE_RUN=0 -DBUILD_TFLITE_RUN=0 -DBUILD_NNAPI_TEST=0 -DBUILD_RUNTIME_NNAPI_TEST=0 -DBUILD_TFLITE_BENCHMARK_MODEL=0 -DBUILD_TFLITE_VANILLA_RUN=0 -DBUILD_TENSORFLOW_LITE_2_8_0=0 -DBUILD_TENSORFLOW_LITE=0
 export DEBIAN_BUILD=1
 export INSTALL_PATH=debian/tmp/usr/
 %:
diff --git a/infra/docker/bionic/Dockerfile b/infra/docker/bionic/Dockerfile
index dbc22a6e8..f7ffc73fd 100644
--- a/infra/docker/bionic/Dockerfile
+++ b/infra/docker/bionic/Dockerfile
@@ -86,7 +86,7 @@ RUN echo 'deb [trusted=yes] http://download.tizen.org/tools/latest-release/Ubunt
 RUN apt-get update && apt-get -qqy install gbs
 RUN wget http://download.tizen.org/sdk/tizenstudio/official/binary/sdb_3.1.4_ubuntu-64.zip -O sdb.zip
 RUN unzip -d tmp sdb.zip && rm sdb.zip
-RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp
+RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp/*
 
 # Install java
 RUN apt-get install -y --no-install-recommends openjdk-8-jdk
diff --git a/infra/docker/focal/Dockerfile b/infra/docker/focal/Dockerfile
index 6f3cd9b60..1cdeffbb7 100644
--- a/infra/docker/focal/Dockerfile
+++ b/infra/docker/focal/Dockerfile
@@ -46,7 +46,7 @@ RUN echo 'deb [trusted=yes] http://download.tizen.org/tools/latest-release/Ubunt
 RUN apt-get update && apt-get -qqy install gbs
 RUN wget http://download.tizen.org/sdk/tizenstudio/official/binary/sdb_4.2.19_ubuntu-64.zip -O sdb.zip
 RUN unzip -d tmp sdb.zip && rm sdb.zip
-RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp
+RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp/*
 
 # Clean archives (to reduce image size)
 RUN apt-get clean -y
diff --git a/infra/nncc/CMakeLists.txt b/infra/nncc/CMakeLists.txt
index 2ff5a5f6a..768d7972b 100644
--- a/infra/nncc/CMakeLists.txt
+++ b/infra/nncc/CMakeLists.txt
@@ -1,4 +1,7 @@
-cmake_minimum_required(VERSION 3.1)
+# The libboost 1.74 uses IN_LIST operator, which requires the policy CMP0057, in a CMake file.
+# This policy requires ``cmake_minimum_required(VERSION 3.3)``.
+# Run "cmake --help-policy CMP0057" for policy details.
+cmake_minimum_required(VERSION 3.3)
 
 project(nncc)
 
diff --git a/infra/nncc/cmake/options/options_armv7em-generic.cmake b/infra/nncc/cmake/options/options_armv7em-generic.cmake
new file mode 100644
index 000000000..d671b73f1
--- /dev/null
+++ b/infra/nncc/cmake/options/options_armv7em-generic.cmake
@@ -0,0 +1,3 @@
+#
+# armv7em generic cmake options
+#
diff --git a/infra/nnfw/CMakeLists.txt b/infra/nnfw/CMakeLists.txt
index 897a16fbf..2a27eee59 100644
--- a/infra/nnfw/CMakeLists.txt
+++ b/infra/nnfw/CMakeLists.txt
@@ -55,6 +55,12 @@ macro(nnas_find_package PREFIX)
   )
 endmacro(nnas_find_package)
 
+# C++14 feature requires 5 or later
+# Using std::unordered_map shows build fail under 6.2
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.2)
+  message(FATAL "Runtime build requires GNU Compiler version 6.2 or later.")
+endif()
+
 set(CMAKE_CXX_STANDARD 14)
 set(CMAKE_CXX_EXTENSIONS OFF)
 
diff --git a/infra/nnfw/cmake/ApplyCompileFlags.cmake b/infra/nnfw/cmake/ApplyCompileFlags.cmake
index b042b0c42..b1c7ff568 100644
--- a/infra/nnfw/cmake/ApplyCompileFlags.cmake
+++ b/infra/nnfw/cmake/ApplyCompileFlags.cmake
@@ -31,3 +31,13 @@ endforeach()
 foreach(FLAG ${FLAGS_CXXONLY})
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
 endforeach()
+
+# lib pthread as a variable (finding pthread build option must be disabled on android)
+# Define here to use on external lib build
+set(LIB_PTHREAD lib_pthread)
+add_library(${LIB_PTHREAD} INTERFACE)
+if(NOT TARGET_OS STREQUAL "android")
+  # Get compile option (ex. "-pthread" on linux GNU build tool)
+  find_package(Threads)
+  target_link_libraries(${LIB_PTHREAD} INTERFACE Threads::Threads)
+endif()
diff --git a/infra/nnfw/cmake/CfgOptionFlags.cmake b/infra/nnfw/cmake/CfgOptionFlags.cmake
index 5371120ad..440f1859a 100644
--- a/infra/nnfw/cmake/CfgOptionFlags.cmake
+++ b/infra/nnfw/cmake/CfgOptionFlags.cmake
@@ -31,6 +31,8 @@ option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" ON)
 option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" ON)
 option(INSTALL_TEST_SCRIPTS "Install test scripts" ON)
 option(BUILD_GPU_CL "Build gpu_cl backend" OFF)
+option(BUILD_NPUD "Build NPU daemon" ON)
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" ON)
 #
 # Default build configuration for contrib
 #
@@ -72,9 +74,10 @@ option(DOWNLOAD_OOURAFFT "Download Ooura FFT source" ON)
 option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" ON)
 option(BUILD_BOOST "Build boost source" OFF)
 option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" ON)
-option(BUILD_TENSORFLOW_LITE_2_3_0 "Build TensorFlow Lite 2.3.0 from the downloaded source" OFF)
+option(BUILD_TENSORFLOW_LITE_2_8_0 "Build TensorFlow Lite 2.8.0 from the downloaded source" OFF)
 option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" OFF)
 option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" ON)
+option(DEBUG_ARMCOMPUTE "Build ARM Compute as debug type" OFF)
 option(BUILD_RUY "Build ruy library from the downloaded source" ON)
 option(BUILD_CPUINFO "Build cpuinfo library from the downloaded source" ON)
 option(PROFILE_RUY "Enable ruy library profiling" OFF)
diff --git a/infra/nnfw/cmake/buildtool/config/config_aarch64-android.cmake b/infra/nnfw/cmake/buildtool/config/config_aarch64-android.cmake
index e0c81dee7..fb63b3c47 100644
--- a/infra/nnfw/cmake/buildtool/config/config_aarch64-android.cmake
+++ b/infra/nnfw/cmake/buildtool/config/config_aarch64-android.cmake
@@ -1,8 +1,5 @@
 include("cmake/buildtool/config/config_linux.cmake")
 
-# On Android, pthread is contained in bionic(libc)
-set(LIB_PTHREAD "")
-
 # SIMD for aarch64
 set(FLAGS_COMMON ${FLAGS_COMMON}
     "-ftree-vectorize"
diff --git a/infra/nnfw/cmake/buildtool/config/config_linux.cmake b/infra/nnfw/cmake/buildtool/config/config_linux.cmake
index 86dd0f217..01b47ef4a 100644
--- a/infra/nnfw/cmake/buildtool/config/config_linux.cmake
+++ b/infra/nnfw/cmake/buildtool/config/config_linux.cmake
@@ -2,20 +2,11 @@
 # linux common compile options
 #
 
-# remove warning from arm cl
+# Remove warning: ignoring attributes on template argument (ACL, Eigen, etc)
 # https://github.com/ARM-software/ComputeLibrary/issues/330
-set(GCC_VERSION_DISABLE_WARNING 6.0)
-if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER GCC_VERSION_DISABLE_WARNING)
-  message(STATUS "GCC version higher than ${GCC_VERSION_DISABLE_WARNING}")
-  set(FLAGS_CXXONLY ${FLAGS_CXXONLY}
-      "-Wno-ignored-attributes"
-      )
-endif()
+set(FLAGS_CXXONLY ${FLAGS_CXXONLY} "-Wno-ignored-attributes")
 
 # Disable annoying ABI compatibility warning.
 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
   list(APPEND FLAGS_CXXONLY "-Wno-psabi")
 endif()
-
-# lib pthread as a variable (pthread must be disabled on android)
-set(LIB_PTHREAD pthread)
diff --git a/infra/nnfw/cmake/buildtool/config/config_x86_64-darwin.cmake b/infra/nnfw/cmake/buildtool/config/config_x86_64-darwin.cmake
index dbd45fc03..52d6c6b2b 100644
--- a/infra/nnfw/cmake/buildtool/config/config_x86_64-darwin.cmake
+++ b/infra/nnfw/cmake/buildtool/config/config_x86_64-darwin.cmake
@@ -7,6 +7,3 @@ message(STATUS "Building for x86-64 Darwin")
 set(FLAGS_COMMON ${FLAGS_COMMON}
     "-msse4"
     )
-
-# lib pthread as a variable (pthread must be disabled on android)
-set(LIB_PTHREAD pthread)
diff --git a/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-linux.cmake b/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-linux.cmake
index 3356aa72d..07b26a937 100644
--- a/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-linux.cmake
+++ b/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-linux.cmake
@@ -21,12 +21,6 @@ endif()
 
 set(CMAKE_SYSROOT ${ROOTFS_DIR})
 set(CMAKE_FIND_ROOT_PATH ${ROOTFS_DIR})
-set(CMAKE_SHARED_LINKER_FLAGS
-    "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
-    CACHE INTERNAL "" FORCE)
-set(CMAKE_EXE_LINKER_FLAGS
-    "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
-    CACHE INTERNAL "" FORCE)
 
 # search for programs in the build host directories
 set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
diff --git a/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-tizen.cmake b/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-tizen.cmake
index 4d5d7ac56..cab7325dd 100644
--- a/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-tizen.cmake
+++ b/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-tizen.cmake
@@ -23,12 +23,6 @@ endif()
 
 set(CMAKE_SYSROOT ${ROOTFS_DIR})
 set(CMAKE_FIND_ROOT_PATH ${ROOTFS_DIR})
-set(CMAKE_SHARED_LINKER_FLAGS
-    "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
-    CACHE INTERNAL "" FORCE)
-set(CMAKE_EXE_LINKER_FLAGS
-    "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
-    CACHE INTERNAL "" FORCE)
 
 # search for programs in the build host directories
 set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
diff --git a/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-linux.cmake b/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-linux.cmake
index 8f2cb6735..c69259f85 100644
--- a/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-linux.cmake
+++ b/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-linux.cmake
@@ -21,12 +21,6 @@ endif()
 
 set(CMAKE_SYSROOT ${ROOTFS_DIR})
 set(CMAKE_FIND_ROOT_PATH ${ROOTFS_DIR})
-set(CMAKE_SHARED_LINKER_FLAGS
-    "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
-    CACHE INTERNAL "" FORCE)
-set(CMAKE_EXE_LINKER_FLAGS
-    "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
-    CACHE INTERNAL "" FORCE)
 
 # search for programs in the build host directories
 set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
diff --git a/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-tizen.cmake b/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-tizen.cmake
index 72513cdc1..181415df2 100644
--- a/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-tizen.cmake
+++ b/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-tizen.cmake
@@ -23,12 +23,6 @@ endif()
 
 set(CMAKE_SYSROOT ${ROOTFS_DIR})
 set(CMAKE_FIND_ROOT_PATH ${ROOTFS_DIR})
-set(CMAKE_SHARED_LINKER_FLAGS
-    "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
-    CACHE INTERNAL "" FORCE)
-set(CMAKE_EXE_LINKER_FLAGS
-    "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
-    CACHE INTERNAL "" FORCE)
 
 # search for programs in the build host directories
 set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
@@ -46,10 +40,6 @@ add_compile_options(-mfpu=neon-vfpv4)
 add_compile_options(-mfloat-abi=softfp)
 add_compile_options(--sysroot=${ROOTFS_DIR})
 
-set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}")
-
-set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}")
-
 include_directories(SYSTEM ${ROOTFS_DIR}/usr/lib/gcc/${TIZEN_TOOLCHAIN}/include/c++/)
 include_directories(SYSTEM ${ROOTFS_DIR}/usr/lib/gcc/${TIZEN_TOOLCHAIN}/include/c++/armv7l-tizen-linux-gnueabi)
 add_compile_options(-Wno-deprecated-declarations) # compile-time option
diff --git a/infra/nnfw/cmake/options/options_aarch64-android.cmake b/infra/nnfw/cmake/options/options_aarch64-android.cmake
index 9332f5299..e95ccca63 100644
--- a/infra/nnfw/cmake/options/options_aarch64-android.cmake
+++ b/infra/nnfw/cmake/options/options_aarch64-android.cmake
@@ -10,3 +10,5 @@ option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
 option(DOWNLOAD_BOOST "Download boost source" ON)
 option(BUILD_BOOST "Build boost source" ON)
 option(BUILD_LOGGING "Build logging runtime" OFF)
+# Do not support npud
+option(BUILD_NPUD "Build NPU daemon" OFF)
diff --git a/infra/nnfw/cmake/options/options_armv7l-tizen.cmake b/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
index eab3b0a92..9b487d93c 100644
--- a/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
@@ -9,6 +9,7 @@ option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
 option(BUILD_LOGGING "Build logging runtime" OFF)
 option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
 option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
 
 option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
 option(DOWNLOAD_TENSORFLOW_GPU "Download Tensorflow GPU delegate source" ON)
diff --git a/infra/nnfw/cmake/options/options_x86_64-tizen.cmake b/infra/nnfw/cmake/options/options_x86_64-tizen.cmake
index 31b7fd6fb..eea37224d 100644
--- a/infra/nnfw/cmake/options/options_x86_64-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_x86_64-tizen.cmake
@@ -2,6 +2,7 @@
 # x86_64 linux cmake options
 #
 option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
+option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
 option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
 option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
 
diff --git a/infra/nnfw/cmake/packages/ARMComputeConfig.cmake b/infra/nnfw/cmake/packages/ARMComputeConfig.cmake
index 6ae7dea34..f6a4efd96 100644
--- a/infra/nnfw/cmake/packages/ARMComputeConfig.cmake
+++ b/infra/nnfw/cmake/packages/ARMComputeConfig.cmake
@@ -90,11 +90,11 @@ function(_ARMCompute_Build ARMComputeInstall_DIR)
     return()
   endif(NOT SCONS_PATH)
 
-  if(CMAKE_BUILD_TYPE)
-    string(TOLOWER "${CMAKE_BUILD_TYPE}" SCON_BUILD_TYPE)
-  else(CMAKE_BUILD_TYPE)
+  if(DEBUG_ARMCOMPUTE)
+    set(SCON_BUILD_TYPE "debug")
+  else(DEBUG_ARMCOMPUTE)
     set(SCON_BUILD_TYPE "release")
-  endif(CMAKE_BUILD_TYPE)
+  endif(DEBUG_ARMCOMPUTE)
 
   #### Architecture-specific configurations
 
diff --git a/infra/nnfw/cmake/packages/CpuInfoConfig.cmake b/infra/nnfw/cmake/packages/CpuInfoConfig.cmake
index 878026d9a..dddec8988 100644
--- a/infra/nnfw/cmake/packages/CpuInfoConfig.cmake
+++ b/infra/nnfw/cmake/packages/CpuInfoConfig.cmake
@@ -16,14 +16,18 @@ function(_CpuInfo_Build)
 
   nnas_include(ExternalProjectTools)
 
-  set(CPUINFO_BUILD_TOOLS OFF CACHE BOOL "Build command-line tools")
-  set(CPUINFO_BUILD_BENCHMARKS OFF CACHE BOOL "Build cpuinfo unit tests")
-  set(CPUINFO_BUILD_UNIT_TESTS OFF CACHE BOOL "Build cpuinfo mock tests")
-  set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE BOOL "Build cpuinfo micro-benchmarks")
+  # Set build option
+  # - Static (position independent)
+  # - No logging
+  # - Library only (CPUINFO_RUNTIME_TYPE is not used)
+  set(CPUINFO_LIBRARY_TYPE "static" CACHE STRING "")
+  set(CPUINFO_LOG_LEVEL "none" CACHE STRING "")
+  set(CPUINFO_BUILD_TOOLS OFF CACHE BOOL "")
+  set(CPUINFO_BUILD_BENCHMARKS OFF CACHE BOOL "")
+  set(CPUINFO_BUILD_UNIT_TESTS OFF CACHE BOOL "")
+  set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE BOOL "")
   add_extdirectory("${CpuInfoSource_DIR}" cpuinfo EXCLUDE_FROM_ALL)
   set_target_properties(cpuinfo PROPERTIES POSITION_INDEPENDENT_CODE ON)
-  # Suppress warnings generated by clog
-  set_target_properties(clog PROPERTIES COMPILE_FLAGS "-Wno-unused-result")
   set(CpuInfoSource_DIR ${CpuInfoSource_DIR} PARENT_SCOPE)
   set(CpuInfo_FOUND TRUE PARENT_SCOPE)
 endfunction(_CpuInfo_Build)
diff --git a/infra/nnfw/cmake/packages/GLib2.0Config.cmake b/infra/nnfw/cmake/packages/GLib2.0Config.cmake
new file mode 100644
index 000000000..d4c6bf241
--- /dev/null
+++ b/infra/nnfw/cmake/packages/GLib2.0Config.cmake
@@ -0,0 +1,41 @@
+function(_GLIB_2_0_import)
+  find_library(GLIB_LIBRARIES
+    NAMES glib-2.0)
+  
+  get_filename_component(GLIB_LIBRARY_DIR ${GLIB_LIBRARIES} DIRECTORY)
+  find_path(GLIBCONFIG_INCLUDE_DIR
+    NAMES glibconfig.h
+    PATHS ${GLIB_LIBRARY_DIR}
+    PATH_SUFFIXES glib-2.0/include
+    NO_CMAKE_FIND_ROOT_PATH)
+
+  find_path(GLIB_INCLUDE_DIR
+    NAMES glib.h
+    PATH_SUFFIXES glib-2.0)
+  
+  set(GLIB_FOUND TRUE)
+
+  if(NOT GLIB_LIBRARIES)
+    set(GLIB_FOUND FALSE)
+  endif(NOT GLIB_LIBRARIES)
+
+  if(NOT GLIBCONFIG_INCLUDE_DIR)
+    set(GLIB_FOUND FALSE)
+  endif(NOT GLIBCONFIG_INCLUDE_DIR)
+
+  if(NOT GLIB_INCLUDE_DIR)
+    set(GLIB_FOUND FALSE)
+  endif(NOT GLIB_INCLUDE_DIR)
+
+  set(GLIB_INCLUDE_DIRS ${GLIB_INCLUDE_DIR} ${GLIBCONFIG_INCLUDE_DIR})
+
+  if(NOT GLIB_FOUND)
+    message(STATUS "Failed to find GLib 2.0")
+  endif(NOT GLIB_FOUND)
+
+  set(GLIB2.0_FOUND ${GLIB_FOUND} PARENT_SCOPE)
+  set(GLIB2.0_INCLUDE_DIRS ${GLIB_INCLUDE_DIRS} PARENT_SCOPE)
+  set(GLIB2.0_LIBRARIES ${GLIB_LIBRARIES} PARENT_SCOPE)
+endfunction(_GLIB_2_0_import)
+
+_GLIB_2_0_import()
diff --git a/infra/nnfw/cmake/packages/Ruy/CMakeLists.txt b/infra/nnfw/cmake/packages/Ruy/CMakeLists.txt
index 9140a17a7..a1c4656e3 100644
--- a/infra/nnfw/cmake/packages/Ruy/CMakeLists.txt
+++ b/infra/nnfw/cmake/packages/Ruy/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(RUY_BASE ${RuySource_DIR}/ruy)
+set(RUY_BASE ${TensorFlowRuySource_DIR}/ruy)
 
 #
 # Ruy library
@@ -14,7 +14,6 @@ list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/example_advanced.cc")
 list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/tune_tool.cc")
 list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/pmu.cc")
 list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/create_trmul_params.cc")
-list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/prepare_packed_matrices.cc")
 
 list(APPEND RUY_INSTRUMENTATION_SRCS "${RUY_BASE}/profiler/instrumentation.cc")
 
@@ -23,7 +22,7 @@ if(PROFILE_RUY)
   list(APPEND RUY_PROFILER_SRCS "${RUY_BASE}/profiler/treeview.cc")
 endif(PROFILE_RUY)
 
-list(APPEND RUY_INCLUDES "${RuySource_DIR}")
+list(APPEND RUY_INCLUDES "${TensorFlowRuySource_DIR}")
 
 add_library(ruy STATIC ${RUY_SRCS})
 target_include_directories(ruy SYSTEM PUBLIC ${RUY_INCLUDES})
diff --git a/infra/nnfw/cmake/packages/RuyConfig.cmake b/infra/nnfw/cmake/packages/RuyConfig.cmake
index 4e7cc24ac..6f5f4b71e 100644
--- a/infra/nnfw/cmake/packages/RuyConfig.cmake
+++ b/infra/nnfw/cmake/packages/RuyConfig.cmake
@@ -5,14 +5,14 @@ function(_Ruy_Build)
     return()
   endif(TARGET ruy)
 
-  nnas_find_package(RuySource QUIET)
+  nnas_find_package(TensorFlowRuySource EXACT 2.8 QUIET)
   nnfw_find_package(CpuInfo QUIET)
 
-  if(NOT RuySource_FOUND)
+  if(NOT TensorFlowRuySource_FOUND)
     message(STATUS "RUY: Source not found")
     set(Ruy_FOUND FALSE PARENT_SCOPE)
     return()
-  endif(NOT RuySource_FOUND)
+  endif(NOT TensorFlowRuySource_FOUND)
 
   if (NOT CpuInfo_FOUND)
     message(STATUS "RUY: CPUINFO not found")
@@ -20,6 +20,17 @@ function(_Ruy_Build)
     return()
   endif(NOT CpuInfo_FOUND)
 
+  # Ruy's cmake requires cmake >= 3.14
+  # If we ready cmake >= 3.14, enable below comment out code
+  #if(PROFILE_RUY)
+  #  # Will be used on ruy build
+  #  set(RUY_PROFILER ON)
+  #endif(PROFILE_RUY)
+  #add_extdirectory("${RuySource_DIR}" Ruy)
+  #
+  ## Ignore warning from ruy
+  #target_compile_options(ruy INTERFACE -Wno-comment)
+
   add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/Ruy" ruy)
   set(Ruy_FOUND TRUE PARENT_SCOPE)
 endfunction(_Ruy_Build)
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLite/CMakeLists.txt b/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLite/CMakeLists.txt
index 9a7b240e9..f872b88cd 100644
--- a/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLite/CMakeLists.txt
+++ b/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLite/CMakeLists.txt
@@ -52,6 +52,12 @@ target_compile_definitions(tensorflow-lite PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FA
 set_property(TARGET tensorflow-lite PROPERTY POSITION_INDEPENDENT_CODE ON)
 target_link_libraries(tensorflow-lite eigen-tf-1.13.1 flatbuffers::flatbuffers ${LIB_PTHREAD} dl)
 
+# Define TF_LITE_DISABLE_X86_NEON for debug build
+# If we upgrade NEON2SSE version, we can remove below line
+if(NEON2SSESource_FOUND)
+  target_compile_definitions(tensorflow-lite PRIVATE $<$<CONFIG:Debug>:TF_LITE_DISABLE_X86_NEON>)
+endif(NEON2SSESource_FOUND)
+
 if(ANDROID)
   target_link_libraries(tensorflow-lite log)
   target_include_directories(tensorflow-lite PUBLIC "${NDK_DIR}/..")
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLite/CMakeLists.txt b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLite/CMakeLists.txt
deleted file mode 100644
index afee6e1cc..000000000
--- a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLite/CMakeLists.txt
+++ /dev/null
@@ -1,96 +0,0 @@
-# Reference: https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile
-#
-# Tensorflow Lite library 2.3.0
-#
-set(TENSORFLOW_LITE_BASE ${TensorFlowSource_DIR}/tensorflow/lite)
-
-file(GLOB TFLITE_CORE_SRCS "${TENSORFLOW_LITE_BASE}/*.c"
-     "${TENSORFLOW_LITE_BASE}/*.cc"
-     "${TENSORFLOW_LITE_BASE}/core/*.cc")
-
-file(GLOB_RECURSE TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/*.cc")
-
-file(GLOB TFLITE_LIB_SRCS "${TENSORFLOW_LITE_BASE}/c/*.c" "${TENSORFLOW_LITE_BASE}/c/*.cc")
-
-file(GLOB TFLITE_API_SRCS "${TENSORFLOW_LITE_BASE}/core/api/*.c"
-     "${TENSORFLOW_LITE_BASE}/core/api/*.cc")
-
-list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/memory_info.cc")
-list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/time.cc")
-
-file(GLOB TFLITE_EXPERIMENTAL_SRCS "${TENSORFLOW_LITE_BASE}/experimental/resource/*.cc")
-
-file(GLOB TFLITE_SPARSITY_SRCS "${TENSORFLOW_LITE_BASE}/tools/optimize/sparsity/*.cc")
-
-list(APPEND TFLITE_SRCS ${TFLITE_CORE_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_KERNEL_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_LIB_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_API_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_PROFILING_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_EXPERIMENTAL_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_SPARSITY_SRCS})
-
-# externals
-list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg.c")
-list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg2d.c")
-
-# Build with mmap? true
-# caution: v2.3.0's Makefile has wrong code on this part. This is fixed on master branch.
-set(BUILD_WITH_MMAP TRUE)
-if(${BUILD_WITH_MMAP})
-  list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation_disabled.cc")
-else()
-  list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation.cc")
-endif()
-
-# Build with nnapi? true
-# caution: this nnapi delegate comes from tflite, not ours.
-set(BUILD_WITH_NNAPI TRUE)
-if(${BUILD_WITH_NNAPI})
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate.cc")
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/quant_lstm_sup.cc")
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation.cc")
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_util.cc")
-else()
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate_disabled.cc")
-  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation_disabled.cc")
-endif()
-
-# ios: we don't support ios
-list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_ios.cc")
-
-# android
-if(NOT ANDROID)
-  list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_android.cc")
-endif()
-
-# exclude some source files
-file(GLOB_RECURSE TFLITE_EXCLS "${TENSORFLOW_LITE_BASE}/*test*.cc"
-     "${TENSORFLOW_LITE_BASE}/*benchmark*.cc"
-     "${TENSORFLOW_LITE_BASE}/*example*.cc"
-     "${TENSORFLOW_LITE_BASE}/*tool*.cc")
-list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_EXCLS})
-
-# include headers
-list(APPEND TFLITE_INCLUDES "${TensorFlowSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TensorFlowGEMMLowpSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${Fp16Source_DIR}/include")
-
-if(NEON2SSESource_FOUND)
-  list(APPEND TFLITE_INCLUDES "${NEON2SSESource_DIR}")
-endif(NEON2SSESource_FOUND)
-
-add_library(tensorflow-lite-2.3.0 STATIC ${TFLITE_SRCS})
-target_include_directories(tensorflow-lite-2.3.0 SYSTEM PUBLIC ${TFLITE_INCLUDES})
-target_include_directories(tensorflow-lite-2.3.0 PRIVATE ${CpuInfoSource_DIR})
-target_compile_definitions(tensorflow-lite-2.3.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV -DRUY_HAVE_CPUINFO")
-set_property(TARGET tensorflow-lite-2.3.0 PROPERTY POSITION_INDEPENDENT_CODE ON)
-target_link_libraries(tensorflow-lite-2.3.0 eigen flatbuffers::flatbuffers ruy abseil farmhash ${LIB_PTHREAD} dl)
-if(NOT ANDROID AND ${BUILD_WITH_NNAPI})
-  target_link_libraries(tensorflow-lite-2.3.0 rt)
-endif()
-
-if(ANDROID)
-  target_link_libraries(tensorflow-lite-2.3.0 log)
-  target_include_directories(tensorflow-lite-2.3.0 PUBLIC "${NDK_DIR}/..")
-endif()
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfig.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfig.cmake
deleted file mode 100644
index c81958cf4..000000000
--- a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfig.cmake
+++ /dev/null
@@ -1,44 +0,0 @@
-if(BUILD_TENSORFLOW_LITE_2_3_0)
-  macro(return_unless VAR)
-  if(NOT ${VAR})
-    message("TFLiteVanillaRun: ${VAR} NOT TRUE")
-    set(TensorFlowLite_2_3_0_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT ${VAR})
-  endmacro(return_unless)
-
-  nnas_include(ExternalSourceTools)
-  nnas_include(OptionTools)
-
-  nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET)
-  return_unless(TensorFlowSource_FOUND)
-
-  # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/tensorflow/workspace.bzl
-  nnas_find_package(AbseilSource QUIET)
-  return_unless(AbseilSource_FOUND)
-  nnfw_find_package(Eigen QUIET)
-  return_unless(Eigen_FOUND)
-  nnas_find_package(Farmhash QUIET)
-  return_unless(Farmhash_FOUND)
-  nnfw_find_package(FlatBuffers QUIET)
-  return_unless(FlatBuffers_FOUND)
-  nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET)
-  return_unless(TensorFlowGEMMLowpSource_FOUND)
-  nnas_find_package(OouraFFTSource QUIET)
-  return_unless(OouraFFTSource_FOUND)
-  nnfw_find_package(Ruy QUIET)
-  return_unless(Ruy_FOUND)
-
-  # TensorFlow Lite requires FP16 library's header only
-  nnas_find_package(Fp16Source QUIET)
-  return_unless(Fp16Source_FOUND)
-
-  # Optional packages
-  nnas_find_package(NEON2SSESource QUIET)
-
-  nnas_include(ExternalProjectTools)
-  add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite" tflite-2.3.0)
-
-  set(TensorFlowLite_2_3_0_FOUND TRUE)
-  return()
-endif()
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfigVersion.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfigVersion.cmake
deleted file mode 100644
index 08e637421..000000000
--- a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfigVersion.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-set(PACKAGE_VERSION "2.3.0")
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
-  set(PACKAGE_VERSION_EXACT TRUE)
-  set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLite/CMakeLists.txt b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLite/CMakeLists.txt
new file mode 100644
index 000000000..d7e1d0666
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLite/CMakeLists.txt
@@ -0,0 +1,121 @@
+# Reference: https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile
+#
+# Tensorflow Lite library 2.3.0
+#
+set(TENSORFLOW_LITE_BASE ${TensorFlowSource_DIR}/tensorflow/lite)
+
+file(GLOB TFLITE_CORE_SRCS "${TENSORFLOW_LITE_BASE}/*.c"
+     "${TENSORFLOW_LITE_BASE}/*.cc"
+     "${TENSORFLOW_LITE_BASE}/core/*.cc")
+
+file(GLOB_RECURSE TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/*.cc")
+
+file(GLOB TFLITE_LIB_SRCS "${TENSORFLOW_LITE_BASE}/c/*.c" "${TENSORFLOW_LITE_BASE}/c/*.cc")
+
+file(GLOB TFLITE_API_SRCS "${TENSORFLOW_LITE_BASE}/core/api/*.c"
+     "${TENSORFLOW_LITE_BASE}/core/api/*.cc")
+
+list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/memory_info.cc")
+list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/time.cc")
+list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/platform_profiler.cc")
+
+file(GLOB TFLITE_EXPERIMENTAL_SRCS "${TENSORFLOW_LITE_BASE}/experimental/resource/*.cc")
+
+file(GLOB TFLITE_SCHEMA_UTIL_SRCS "${TENSORFLOW_LITE_BASE}/schema/*.cc")
+
+# Moved to kerenls/internal/utils
+#file(GLOB TFLITE_SPARSITY_SRCS "${TENSORFLOW_LITE_BASE}/tools/optimize/sparsity/*.cc")
+
+list(APPEND TFLITE_SRCS ${TFLITE_CORE_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_KERNEL_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_LIB_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_API_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_PROFILING_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_EXPERIMENTAL_SRCS})
+#list(APPEND TFLITE_SRCS ${TFLITE_SPARSITY_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_SCHEMA_UTIL_SRCS})
+
+# externals
+list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg.c")
+list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg2d.c")
+
+# Build with mmap? true
+# caution: v2.3.0's Makefile has wrong code on this part. This is fixed on master branch.
+set(BUILD_WITH_MMAP TRUE)
+if(${BUILD_WITH_MMAP})
+  list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation_disabled.cc")
+else()
+  list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation.cc")
+endif()
+
+# Build with nnapi? true
+# caution: this nnapi delegate comes from tflite, not ours.
+set(BUILD_WITH_NNAPI TRUE)
+if(${BUILD_WITH_NNAPI})
+  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate.cc")
+  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/quant_lstm_sup.cc")
+  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/utils.cc")
+  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/serialization.cc")
+  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation.cc")
+  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_util.cc")
+else()
+  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate_disabled.cc")
+  list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation_disabled.cc")
+endif()
+
+# ios: we don't support ios
+list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_ios.cc")
+
+# android
+if(NOT ANDROID)
+  list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_android.cc")
+endif()
+
+# exclude some source files
+file(GLOB_RECURSE TFLITE_EXCLS "${TENSORFLOW_LITE_BASE}/*test*.cc"
+     "${TENSORFLOW_LITE_BASE}/*benchmark*.cc"
+     "${TENSORFLOW_LITE_BASE}/*example*.cc"
+     "${TENSORFLOW_LITE_BASE}/*tool*.cc")
+list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_EXCLS})
+
+# exclude some kernels (requires python3-dev package)
+# TODO Enable these kernels by installing package on build system
+file(GLOB_RECURSE TFLITE_KERNEL_EXCLS "${TENSORFLOW_LITE_BASE}/kernels/variable_ops_wrapper.cc"
+     "${TENSORFLOW_LITE_BASE}/kernels/gradient/*.cc"
+     "${TENSORFLOW_LITE_BASE}/kernels/perception/*.cc")
+list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_KERNEL_EXCLS})
+
+# exclude kernel shim
+file(GLOB_RECURSE TFLITE_SHIM_EXCLS "${TENSORFLOW_LITE_BASE}/kernels/shim/*.cc")
+list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_SHIM_EXCLS})
+
+# include headers
+list(APPEND TFLITE_INCLUDES "${TensorFlowSource_DIR}")
+list(APPEND TFLITE_INCLUDES "${TensorFlowGEMMLowpSource_DIR}")
+list(APPEND TFLITE_INCLUDES "${Fp16Source_DIR}/include")
+#list(APPEND TFLITE_INCLUDES "${Pybind11Source_DIR}/include")
+
+if(NEON2SSESource_FOUND)
+  list(APPEND TFLITE_INCLUDES "${NEON2SSESource_DIR}")
+endif(NEON2SSESource_FOUND)
+
+add_library(tensorflow-lite-2.8.0 STATIC ${TFLITE_SRCS})
+target_include_directories(tensorflow-lite-2.8.0 SYSTEM PUBLIC ${TFLITE_INCLUDES})
+target_include_directories(tensorflow-lite-2.8.0 PRIVATE ${CpuInfoSource_DIR})
+target_compile_definitions(tensorflow-lite-2.8.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV -DRUY_HAVE_CPUINFO")
+set_property(TARGET tensorflow-lite-2.8.0 PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_link_libraries(tensorflow-lite-2.8.0 eigen flatbuffers::flatbuffers ruy abseil farmhash ${LIB_PTHREAD} dl)
+if(NOT ANDROID AND ${BUILD_WITH_NNAPI})
+  target_link_libraries(tensorflow-lite-2.8.0 rt)
+endif()
+
+# Define TF_LITE_DISABLE_X86_NEON for debug build
+# If we upgrade NEON2SSE version, we can remove below line
+if(NEON2SSESource_FOUND)
+  target_compile_definitions(tensorflow-lite-2.8.0 PRIVATE $<$<CONFIG:Debug>:TF_LITE_DISABLE_X86_NEON>)
+endif(NEON2SSESource_FOUND)
+
+if(ANDROID)
+  target_link_libraries(tensorflow-lite-2.8.0 log)
+  target_include_directories(tensorflow-lite-2.8.0 PUBLIC "${NDK_DIR}/..")
+endif()
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfig.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfig.cmake
new file mode 100644
index 000000000..1c8061812
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfig.cmake
@@ -0,0 +1,50 @@
+if(BUILD_TENSORFLOW_LITE_2_8_0)
+  macro(return_unless VAR)
+  if(NOT ${VAR})
+    message("TFLite 2.8: ${VAR} NOT TRUE")
+    set(TensorFlowLite_2_8_0_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT ${VAR})
+  endmacro(return_unless)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  nnas_find_package(TensorFlowSource EXACT 2.8.0 QUIET)
+  return_unless(TensorFlowSource_FOUND)
+
+  # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/tensorflow/workspace.bzl
+  nnas_find_package(AbseilSource QUIET)
+  return_unless(AbseilSource_FOUND)
+  nnfw_find_package(Eigen QUIET)
+  return_unless(Eigen_FOUND)
+  nnas_find_package(Farmhash QUIET)
+  return_unless(Farmhash_FOUND)
+  nnfw_find_package(FlatBuffers QUIET)
+  return_unless(FlatBuffers_FOUND)
+  nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.8.0 QUIET)
+  return_unless(TensorFlowGEMMLowpSource_FOUND)
+  nnas_find_package(OouraFFTSource QUIET)
+  return_unless(OouraFFTSource_FOUND)
+  nnfw_find_package(Ruy QUIET)
+  return_unless(Ruy_FOUND)
+
+  # TensorFlow Lite requires FP16 library's header only
+  nnas_find_package(Fp16Source QUIET)
+  return_unless(Fp16Source_FOUND)
+
+  # TensorFlow Lite requires Pybind11 library's header only
+  # But Pybind11 requires python3-dev package
+  # TODO Enable below by installing package on build system
+  #nnas_find_package(Pybind11Source QUIET)
+  #return_unless(Pybind11Source_FOUND)
+
+  # Optional packages
+  nnas_find_package(NEON2SSESource QUIET)
+
+  nnas_include(ExternalProjectTools)
+  add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite" tflite-2.8.0)
+
+  set(TensorFlowLite_2_8_0_FOUND TRUE)
+  return()
+endif()
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfigVersion.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfigVersion.cmake
new file mode 100644
index 000000000..cd49d7b72
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfigVersion.cmake
@@ -0,0 +1,9 @@
+set(PACKAGE_VERSION "2.8.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/nnfw/config/gbs.conf b/infra/nnfw/config/gbs.conf
index 1150a5fc8..2b5994a1b 100644
--- a/infra/nnfw/config/gbs.conf
+++ b/infra/nnfw/config/gbs.conf
@@ -3,20 +3,11 @@
 profile = profile.tizen
 
 [profile.tizen]
-user=obs_viewer
-obs = obs.tizen
-repos = repo.tizen_one,repo.tizen_base,repo.tizen_mobile
+repos = repo.tizen_base,repo.tizen_mobile
 buildroot = /home/GBS-ROOT/
 
-[obs.tizen]
-url = http://api.tizen.org
-
 [repo.tizen_mobile]
 url = http://download.tizen.org/snapshots/tizen/unified/latest/repos/standard/packages/
 
 [repo.tizen_base]
 url = http://download.tizen.org/snapshots/tizen/base/latest/repos/standard/packages/
-
-[repo.tizen_one]
-url = http://13.125.34.93/archive/tizen/
-
diff --git a/infra/packaging/preset/20220323 b/infra/packaging/preset/20220323
index 421106c35..0eac1064f 100644
--- a/infra/packaging/preset/20220323
+++ b/infra/packaging/preset/20220323
@@ -20,21 +20,26 @@ function preset_configure()
   # loco IR and related utilities
   REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
   # Flatbuffer I/O
-  REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-tflite280" "mio-circle04")
+  REQUIRED_UNITS+=("mio-tflite280" "mio-circle04")
   # Data I/O
   REQUIRED_UNITS+=("dio-hdf5")
   # Circle compiler library (.circle -> .circle)
   REQUIRED_UNITS+=("luci")
   # Tools
-  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef")
   REQUIRED_UNITS+=("circle-tensordump" "circledump")
-  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter")
   REQUIRED_UNITS+=("luci-eval-driver")
   REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
-  REQUIRED_UNITS+=("circle-partitioner")
+  REQUIRED_UNITS+=("circle-eval-diff" "circle-interpreter")
+  REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
   REQUIRED_UNITS+=("one-cmds")
   REQUIRED_UNITS+=("bcq-tools")
 
+  # Dependent modules needed for build
+  REQUIRED_UNITS+=("circlechef")
+  REQUIRED_UNITS+=("circle-verify")
+
   NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
 
   # TODO Use "nncc configure" and "nncc build"
diff --git a/infra/packaging/preset/20220323_windows b/infra/packaging/preset/20220323_windows
index 60500b1e0..14917b3dd 100644
--- a/infra/packaging/preset/20220323_windows
+++ b/infra/packaging/preset/20220323_windows
@@ -15,20 +15,26 @@ function preset_configure()
   # loco IR and related utilities
   REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
   # Flatbuffer I/O
-  REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-tflite280" "mio-circle04")
+  REQUIRED_UNITS+=("mio-tflite280" "mio-circle04")
   # Data I/O
   REQUIRED_UNITS+=("dio-hdf5")
   # Circle compiler library (.circle -> .circle)
   REQUIRED_UNITS+=("luci")
   # Tools
-  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
-  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef")
+  REQUIRED_UNITS+=("circle-tensordump" "circledump")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter")
   REQUIRED_UNITS+=("luci-eval-driver")
   REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
-  REQUIRED_UNITS+=("circle-partitioner")
+  REQUIRED_UNITS+=("circle-eval-diff" "circle-interpreter")
+  REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
   REQUIRED_UNITS+=("one-cmds")
   REQUIRED_UNITS+=("bcq-tools")
 
+  # Dependent modules needed for build
+  REQUIRED_UNITS+=("circlechef")
+  REQUIRED_UNITS+=("circle-verify")
+
   NPROC=$(cat /proc/cpuinfo | grep -c processor)
 
   # TODO Use "nncc configure" and "nncc build"
diff --git a/infra/packaging/res/tf2nnpkg.20220323 b/infra/packaging/res/tf2nnpkg.20220323
index 0d44818a1..5f43b2386 100644
--- a/infra/packaging/res/tf2nnpkg.20220323
+++ b/infra/packaging/res/tf2nnpkg.20220323
@@ -104,6 +104,6 @@ fi
 ${ONE_IMPORT_BCQ_SCRIPT}
 
 # optimize
-"${ROOT}/bin/circle2circle" --O1 "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+"${ROOT}/bin/circle2circle" --resolve_customop_add "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
 
 "${ROOT}/bin/model2nnpkg.sh" -o "${OUTPUT_DIR}" "${TMPDIR}/${MODEL_NAME}.circle"
diff --git a/infra/scripts/compiler_modules.sh b/infra/scripts/compiler_modules.sh
index 6a857d2c8..51cba92f9 100644
--- a/infra/scripts/compiler_modules.sh
+++ b/infra/scripts/compiler_modules.sh
@@ -1,5 +1,8 @@
 #!/bin/bash
 
+# NOTE this file is sourced from, for the purpose of
+# - configure_compiler_coverage.sh: to get test coverage for release criteria
+
 # Don't run this script
 [[ "${BASH_SOURCE[0]}" == "${0}" ]] && echo "Please don't execute ${BASH_SOURCE[0]}, source it" && return
 
@@ -8,13 +11,14 @@ DEBUG_BUILD_ITEMS+=";oops;pepper-assert;pepper-csv2vec"
 DEBUG_BUILD_ITEMS+=";hermes;hermes-std"
 DEBUG_BUILD_ITEMS+=";loco;locop;locomotiv;logo-core;logo"
 DEBUG_BUILD_ITEMS+=";foder;crew;souschef;arser;vconone"
-DEBUG_BUILD_ITEMS+=";safemain;mio-circle04;mio-tflite;mio-tflite260;mio-tflite280"
+DEBUG_BUILD_ITEMS+=";safemain;mio-circle04;mio-tflite280;dio-hdf5"
 DEBUG_BUILD_ITEMS+=";tflite2circle"
 DEBUG_BUILD_ITEMS+=";luci"
 DEBUG_BUILD_ITEMS+=";luci-interpreter"
 DEBUG_BUILD_ITEMS+=";luci-eval-driver;luci-pass-value-test;luci-value-test"
 DEBUG_BUILD_ITEMS+=";circle2circle;record-minmax;circle-quantizer"
-DEBUG_BUILD_ITEMS+=";circle-partitioner;circle-part-driver"
+DEBUG_BUILD_ITEMS+=";circle-eval-diff"
+DEBUG_BUILD_ITEMS+=";circle-partitioner;circle-part-driver;circle-operator"
 DEBUG_BUILD_ITEMS+=";circle-verify"
 DEBUG_BUILD_ITEMS+=";circle-tensordump"
 DEBUG_BUILD_ITEMS+=";tflchef;circlechef"
@@ -25,3 +29,5 @@ DEBUG_BUILD_ITEMS+=";tf2tfliteV2;tf2tfliteV2-conversion-test"
 DEBUG_BUILD_ITEMS+=";tflite2circle-conversion-test"
 DEBUG_BUILD_ITEMS+=";pota-quantization-value-test"
 DEBUG_BUILD_ITEMS+=";circle-part-value-test"
+DEBUG_BUILD_ITEMS+=";circle-quantizer-dredd-recipe-test"
+DEBUG_BUILD_ITEMS+=";circle-operator-test"
diff --git a/infra/scripts/docker_build_nncc.sh b/infra/scripts/docker_build_nncc.sh
index 7146141bb..2e603b550 100755
--- a/infra/scripts/docker_build_nncc.sh
+++ b/infra/scripts/docker_build_nncc.sh
@@ -27,13 +27,13 @@ else
 fi
 
 # prepare tensorflow
-if [ -d $TENSORFLOW_PREFIX ]; then
+if [ -n "$TENSORFLOW_PREFIX" ]; then
   DOCKER_OPTS+=" -v $TENSORFLOW_PREFIX:/opt/tensorflow"
   CONFIG_OPTIONS+=" -DTENSORFLOW_PREFIX=/opt/tensorflow"
 fi
 
 # prepare onnx
-if [ -d $ONNXRUNTIME_PREFIX ]; then
+if [ -n "$ONNXRUNTIME_PREFIX" ]; then
   DOCKER_OPTS+=" -v $ONNXRUNTIME_PREFIX:/opt/onnxruntime"
   CONFIG_OPTIONS+=" -DONNXRUNTIME_PREFIX=/opt/onnxruntime"
 fi
diff --git a/infra/scripts/docker_build_test_x64.sh b/infra/scripts/docker_build_test_x64.sh
index 26d8de4a9..b3428e083 100755
--- a/infra/scripts/docker_build_test_x64.sh
+++ b/infra/scripts/docker_build_test_x64.sh
@@ -32,8 +32,8 @@ pushd $ROOT_PATH > /dev/null
 export DOCKER_ENV_VARS
 export DOCKER_VOLUMES
 export BUILD_OPTIONS
-# Disable nnpackage_run build: mismatch between buildtool for CI and installed hdf5
-CMD="export OPTIONS='-DBUILD_NNPACKAGE_RUN=OFF $BUILD_OPTIONS' && \
+
+CMD="export OPTIONS='$BUILD_OPTIONS' && \
      export BUILD_TYPE=Release && \
      cp -nv Makefile.template Makefile && \
      make all install build_test_suite"
diff --git a/infra/scripts/docker_collect_nnpkg_resources.sh b/infra/scripts/docker_collect_nnpkg_resources.sh
index 06cf8809a..afdd3b9cb 100755
--- a/infra/scripts/docker_collect_nnpkg_resources.sh
+++ b/infra/scripts/docker_collect_nnpkg_resources.sh
@@ -28,13 +28,13 @@ else
 fi
 
 # prepare tensorflow
-if [ -d $TENSORFLOW_PREFIX ]; then
+if [ -n "$TENSORFLOW_PREFIX" ]; then
   DOCKER_OPTS+=" -v $TENSORFLOW_PREFIX:/opt/tensorflow"
   CONFIG_OPTIONS+=" -DTENSORFLOW_PREFIX=/opt/tensorflow"
 fi
 
 # prepare onnx
-if [ -d $ONNXRUNTIME_PREFIX ]; then
+if [ -n "$ONNXRUNTIME_PREFIX" ]; then
   DOCKER_OPTS+=" -v $ONNXRUNTIME_PREFIX:/opt/onnxruntime"
   CONFIG_OPTIONS+=" -DONNXRUNTIME_PREFIX=/opt/onnxruntime"
 fi
@@ -71,7 +71,7 @@ REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
 # Circle compiler library (.circle -> .circle)
 REQUIRED_UNITS+=("luci")
 # Flatbuffer I/O
-REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-tflite280" "mio-circle04")
+REQUIRED_UNITS+=("mio-tflite280" "mio-circle04")
 # Tools
 REQUIRED_UNITS+=("tflite2circle" "circle2circle" "luci-interpreter")
 REQUIRED_UNITS+=("souschef" "tflchef" "circlechef" "circle-verify")
diff --git a/infra/scripts/test_ubuntu_runtime_mixed.sh b/infra/scripts/test_ubuntu_runtime_mixed.sh
index 697fed897..2510d9c85 100755
--- a/infra/scripts/test_ubuntu_runtime_mixed.sh
+++ b/infra/scripts/test_ubuntu_runtime_mixed.sh
@@ -55,8 +55,8 @@ echo "GeneratedTests.squeeze_relaxed" >> $SKIPLIST_PREFIX.union
 
 # Run the test
 export OP_BACKEND_Conv2D="cpu"
-export OP_BACKEND_MaxPool2D="acl_cl"
-export OP_BACKEND_AvgPool2D="acl_neon"
+export OP_BACKEND_Pool2D="acl_cl"
+export OP_BACKEND_FullyConnected="acl_neon"
 export ACL_LAYOUT="NCHW"
 export RUY_THREADS=4
 NNAPIGTest "acl_cl;acl_neon;cpu" "Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}.union" "report/mixed"
diff --git a/infra/scripts/unittest_compiler_xml.sh b/infra/scripts/unittest_compiler_xml.sh
index 46d3bc813..6e9e8ad7f 100755
--- a/infra/scripts/unittest_compiler_xml.sh
+++ b/infra/scripts/unittest_compiler_xml.sh
@@ -7,7 +7,9 @@ set -eo pipefail
 CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 ROOT_PATH="$CURRENT_PATH/../../"
 NNCC_WORKSPACE=${NNCC_WORKSPACE:-${ROOT_PATH}build}
-UNITTEST_REPORT_DIR=${NNCC_WORKSPACE}/unittest_compiler_xml
+
+# Use fixed absolute report dir for CI
+UNITTEST_REPORT_DIR=${ROOT_PATH}build/unittest_compiler_xml
 
 for i in "$@"
 do
@@ -25,5 +27,10 @@ fi
 
 for TEST_BIN in `find ${NNCC_WORKSPACE}/compiler -type f -executable -name *_test`; do
   TEST_NAME="$(basename -- $TEST_BIN)"
-  LUGI_LOG=999 $TEST_BIN --gtest_output="xml:$UNITTEST_REPORT_DIR/$TEST_NAME.xml"
+  TEST_DIR="$(dirname $TEST_BIN)"
+
+  # Execute on test directory to find related file
+  pushd $TEST_DIR > /dev/null
+  LUGI_LOG=999 ./$TEST_NAME --gtest_output="xml:$UNITTEST_REPORT_DIR/$TEST_NAME.xml"
+  popd > /dev/null
 done
diff --git a/nnpackage/examples/README.md b/nnpackage/examples/README.md
index fb0bae35e..951048bec 100644
--- a/nnpackage/examples/README.md
+++ b/nnpackage/examples/README.md
@@ -1,5 +1,12 @@
 # NNPackage example
 
+## Package version 1.3.0
+
+### two_tflites
+
+- Model file: two TensorFlow Lite models
+- It has two tflite models with pkg-input, pkg-output and model-connect fields.
+
 ## Package version 1.1.0
 
 ### one_op_in_tflite
diff --git a/nnpackage/examples/v1.3.0/two_tflites/README.md b/nnpackage/examples/v1.3.0/two_tflites/README.md
new file mode 100644
index 000000000..3fcbe2d90
--- /dev/null
+++ b/nnpackage/examples/v1.3.0/two_tflites/README.md
@@ -0,0 +1,28 @@
+## How to create
+
+```
+$ wget https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224.tgz
+$ tar -zxf mobilenet_v1_1.0_224.tgz
+
+$ python tools/tflitefile_tool/select_operator.py mobilenet_v1_1.0_224.tflite <( echo 0-1 ) mv1.0_1.tflite
+$ python tools/tflitefile_tool/select_operator.py mv1.0_1.tflite <( echo 0 ) mv1.0.tflite
+$ python tools/tflitefile_tool/select_operator.py mv1.0_1.tflite <( echo 1 ) mv1.1.tflite
+
+# make sure three tflite is valid
+$ ./Product/out/bin/tflite_comparator mv1.0_1.tflite
+$ ./Product/out/bin/tflite_comparator mv1.0.tflite
+$ ./Product/out/bin/tflite_comparator mv1.1.tflite
+
+$ tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh -m mv1.0.tflite mv1.1.tflite -p two_tflites
+$ cat two_tflites/metadata/MANIFEST
+{
+  "major-version" : "1",
+  "minor-version" : "2",
+  "patch-version" : "0",
+  "configs"     : [  ],
+  "models"      : [ "mv1.0.tflite", "mv1.1.tflite" ],
+  "model-types" : [ "tflite", "tflite" ]
+}
+
+# update minor-version, and add additional fields manually
+```
diff --git a/nnpackage/examples/v1.3.0/two_tflites/metadata/MANIFEST b/nnpackage/examples/v1.3.0/two_tflites/metadata/MANIFEST
new file mode 100644
index 000000000..9d9e21ac1
--- /dev/null
+++ b/nnpackage/examples/v1.3.0/two_tflites/metadata/MANIFEST
@@ -0,0 +1,11 @@
+{
+  "major-version" : "1",
+  "minor-version" : "3",
+  "patch-version" : "0",
+  "configs"       : [  ],
+  "models"        : [ "mv1.0.tflite", "mv1.1.tflite" ],
+  "model-types"   : [ "tflite", "tflite" ],
+  "pkg-inputs"    : [ "0:0:0" ],
+  "pkg-outputs"   : [ "1:0:0" ],
+  "model-connect" : [ { "from" : "0:0:0", "to" : [ "1:0:0" ] } ]
+}
diff --git a/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/expected.h5 b/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/expected.h5
new file mode 100644
index 000000000..59a6b9040
--- /dev/null
+++ b/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/expected.h5
diff --git a/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/input.h5 b/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/input.h5
new file mode 100644
index 000000000..2251157c7
--- /dev/null
+++ b/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/input.h5
diff --git a/nnpackage/examples/v1.3.0/two_tflites/mv1.0.tflite b/nnpackage/examples/v1.3.0/two_tflites/mv1.0.tflite
new file mode 100644
index 000000000..03f30c747
--- /dev/null
+++ b/nnpackage/examples/v1.3.0/two_tflites/mv1.0.tflite
diff --git a/nnpackage/examples/v1.3.0/two_tflites/mv1.1.tflite b/nnpackage/examples/v1.3.0/two_tflites/mv1.1.tflite
new file mode 100644
index 000000000..e3b4f8db7
--- /dev/null
+++ b/nnpackage/examples/v1.3.0/two_tflites/mv1.1.tflite
diff --git a/nnpackage/schema/circle_schema.fbs b/nnpackage/schema/circle_schema.fbs
index 3972056f9..8ad444d95 100644
--- a/nnpackage/schema/circle_schema.fbs
+++ b/nnpackage/schema/circle_schema.fbs
@@ -1,4 +1,4 @@
-// Copyright (c) 2019~2020 Samsung Electronics Co., Ltd. All Rights Reserved
+// Copyright (c) 2019~2022 Samsung Electronics Co., Ltd. All Rights Reserved
 // Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
@@ -28,6 +28,7 @@
 //              `asymmetric_quantize_inputs` for several operator options
 // Version 0.2: BCQ_GATHER and BCQ_FULLY_CONNECTED are added.
 // Version 0.3: SHUFFLED16x1FLOAT32 is added.
+// Version 0.4: Base up to TensorFlow Lite v2.7.0 schema.
 
 namespace circle;
 
@@ -52,6 +53,14 @@ enum TensorType : byte {
   COMPLEX64 = 8,
   INT8 = 9,
   FLOAT64 = 10,
+  COMPLEX128 = 11,
+  UINT64 = 12,
+  // Experimental: Resource and variant types are experimental, that are subject
+  // to change. Do not implement custom kernels using resource & variant types
+  // now.
+  RESOURCE = 13,
+  VARIANT = 14,
+  UINT32 = 15,
 }
 
 // Custom quantization parameters for experimenting with new quantization
@@ -224,8 +233,11 @@ table Tensor {
 // ones, but not by much. Moreover, while custom operators accept an opaque
 // object containing configuration parameters, builtins have a predetermined
 // set of acceptable options.
-
-enum BuiltinOperator : ubyte {
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+  BCQ_GATHER = -4,
+  BCQ_FULLY_CONNECTED = -3,
+  INSTANCE_NORM = -2,
   ADD = 0,
   AVERAGE_POOL_2D = 1,
   CONCATENATION = 2,
@@ -258,7 +270,6 @@ enum BuiltinOperator : ubyte {
   SPACE_TO_DEPTH = 26,
   SVDF = 27,
   TANH = 28,
-  // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
   CONCAT_EMBEDDINGS = 29,
   SKIP_GRAM = 30,
   CALL = 31,
@@ -360,10 +371,28 @@ enum BuiltinOperator : ubyte {
   DENSIFY = 124,
   SEGMENT_SUM = 125,
   BATCH_MATMUL = 126,
-  BCQ_GATHER = 252,
-  BCQ_FULLY_CONNECTED = 253,
-  INSTANCE_NORM = 254,
-}
+  PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  CUMSUM = 128,
+  CALL_ONCE = 129,
+  BROADCAST_TO = 130,
+  RFFT2D = 131,
+  CONV_3D = 132,
+  IMAG=133,
+  REAL=134,
+  COMPLEX_ABS=135,
+  HASHTABLE = 136,
+  HASHTABLE_FIND = 137,
+  HASHTABLE_IMPORT = 138,
+  HASHTABLE_SIZE = 139,
+  REDUCE_ALL = 140,
+  CONV_3D_TRANSPOSE = 141,
+  VAR_HANDLE = 142,
+  READ_VARIABLE = 143,
+  ASSIGN_VARIABLE = 144,
+  BROADCAST_ARGS = 145,
+  RANDOM_STANDARD_NORMAL = 146,
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
 
 // Options for the builtin operators.
 union BuiltinOptions {
@@ -468,6 +497,19 @@ union BuiltinOptions {
   DensifyOptions,
   SegmentSumOptions,
   BatchMatMulOptions,
+  CumsumOptions,
+  CallOnceOptions,
+  BroadcastToOptions,
+  Rfft2dOptions,
+  Conv3DOptions,
+  HashtableOptions,
+  HashtableFindOptions,
+  HashtableImportOptions,
+  HashtableSizeOptions,
+  VarHandleOptions,
+  ReadVariableOptions,
+  AssignVariableOptions,
+  RandomOptions,
   BCQGatherOptions = 252,
   BCQFullyConnectedOptions = 253,
   InstanceNormOptions = 254,
@@ -493,6 +535,18 @@ table Conv2DOptions {
   dilation_h_factor:int = 1;
 }
 
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+  padding:Padding;
+  stride_d:int;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_d_factor:int = 1;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
 table Pool2DOptions {
   padding:Padding;
   stride_w:int;
@@ -599,6 +653,8 @@ table ConcatenationOptions {
 
 table AddOptions {
   fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 3.
+  pot_scale_int16:bool = true;
 }
 
 table MulOptions {
@@ -606,6 +662,7 @@ table MulOptions {
 }
 
 table L2NormOptions {
+  // This field is currently ignored in the L2 Norm Op.
   fused_activation_function:ActivationFunctionType;
 }
 
@@ -679,6 +736,7 @@ table ResizeBilinearOptions {
 
 table ResizeNearestNeighborOptions {
   align_corners: bool;
+  half_pixel_centers: bool;
 }
 
 // A call operation options
@@ -719,6 +777,8 @@ table DepthToSpaceOptions {
 
 table SubOptions {
   fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 5
+  pot_scale_int16:bool = true;
 }
 
 table DivOptions {
@@ -740,6 +800,8 @@ table EmbeddingLookupSparseOptions {
 
 table GatherOptions {
   axis: int;
+  // Parameters for Gather version 5 or above.
+  batch_dims: int = 0;
 }
 
 table TransposeOptions {
@@ -962,6 +1024,10 @@ table IfOptions {
   else_subgraph_index:int;
 }
 
+table CallOnceOptions {
+  init_subgraph_index:int;
+}
+
 table WhileOptions {
   cond_subgraph_index:int;
   body_subgraph_index:int;
@@ -988,6 +1054,54 @@ table SegmentSumOptions {
 table BatchMatMulOptions {
   adjoint_lhs:bool;
   adjoint_rhs:bool;
+  // Parameters for BatchMatMul version 4 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+  exclusive:bool;
+  reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+  // The identity of hash tables. This identity will be used across different
+  // subgraphs in the same interpreter instance.
+  table_id:int;
+  key_dtype:TensorType;
+  value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+  container:string;
+  shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+table RandomOptions {
+  seed: int;
+  seed2: int;
 }
 
 table BCQGatherOptions {
@@ -1008,12 +1122,21 @@ table InstanceNormOptions {
 // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
 // builtin, or a string if the operator is custom.
 table OperatorCode {
-  builtin_code:BuiltinOperator;
+  // This field is for backward compatibility. This field will be used when
+  // the value of the extended builtin_code field has less than
+  // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  deprecated_builtin_code:byte;
   custom_code:string;
 
   // The version of the operator. The version need to be bumped whenever new
   // parameters are introduced into an op.
   version:int = 1;
+
+  // This field is introduced for resolving op builtin code shortage problem
+  // (the original BuiltinOperator enum field was represented as a byte).
+  // This field will be used when the value of the extended builtin_code field
+  // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  builtin_code:BuiltinOperator;
 }
 
 enum CustomOptionsFormat : byte {
@@ -1104,6 +1227,35 @@ table Metadata {
   buffer:uint;
 }
 
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+  // Represents the alias to use for this tensor.
+  name:string;
+
+  // The actual tensor index in the primary graph, that 'name' corresponds to.
+  tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+  // Named inputs for this signature.
+  inputs:[TensorMap];
+
+  // Named outputs for this signature.
+  outputs:[TensorMap];
+
+  // Key value which was in the Tensorflow SavedModel SignatureDef map.
+  signature_key:string;
+
+  // Model tag, deprecated.
+  deprecated_tag:string (deprecated);
+
+  // Index of subgraphs that corresponds to the exported method.
+  subgraph_index:uint;
+}
+
 table Model {
   // Version of the schema.
   version:uint;
@@ -1132,6 +1284,9 @@ table Model {
 
   // Metadata about the model.
   metadata:[Metadata];
+
+  // Optional SignatureDefs for the model.
+  signature_defs:[SignatureDef];
 }
 
 root_type Model;
diff --git a/packaging/ABSEIL.tar.gz b/packaging/ABSEIL.tar.gz
index e05654156..dc7aab548 100644
--- a/packaging/ABSEIL.tar.gz
+++ b/packaging/ABSEIL.tar.gz
diff --git a/packaging/CPUINFO.tar.gz b/packaging/CPUINFO.tar.gz
index ced5debc3..a74fe355a 100644
--- a/packaging/CPUINFO.tar.gz
+++ b/packaging/CPUINFO.tar.gz
diff --git a/packaging/FP16.tar.gz b/packaging/FP16.tar.gz
index ebd276435..78c787673 100644
--- a/packaging/FP16.tar.gz
+++ b/packaging/FP16.tar.gz
diff --git a/packaging/RUY.tar.gz b/packaging/RUY.tar.gz
deleted file mode 100644
index 9ad14fe6c..000000000
--- a/packaging/RUY.tar.gz
+++ /dev/null
diff --git a/packaging/TENSORFLOW-2.8.0-RUY.tar.gz b/packaging/TENSORFLOW-2.8.0-RUY.tar.gz
new file mode 100644
index 000000000..8e6734718
--- /dev/null
+++ b/packaging/TENSORFLOW-2.8.0-RUY.tar.gz
diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec
index 324fe1d11..0518541da 100644
--- a/packaging/nnfw.spec
+++ b/packaging/nnfw.spec
@@ -1,6 +1,6 @@
 Name:    nnfw
 Summary: nnfw
-Version: 1.20.0
+Version: 1.21.0
 Release: 1
 Group:   Development
 License: Apache-2.0 and MIT and BSD-2-Clause and MPL-2.0
@@ -12,7 +12,7 @@ Source1001: nnapi_test_generated.tar.gz
 #Source1002: GTEST.tar.gz
 Source1003: TENSORFLOW-2.3.0-EIGEN.tar.gz
 Source1004: GEMMLOWP.tar.gz
-Source1005: RUY.tar.gz
+Source1005: TENSORFLOW-2.8.0-RUY.tar.gz
 Source1006: CPUINFO.tar.gz
 Source1007: XNNPACK.tar.gz
 Source1008: FXDIV.tar.gz
@@ -28,21 +28,26 @@ Source2001: nnfw.pc.in
 Source2002: nnfw-plugin.pc.in
 
 %{!?build_type:     %define build_type      Release}
+%{!?npud_build:     %define npud_build      1}
 %{!?trix_support:   %define trix_support    1}
 %{!?coverage_build: %define coverage_build  0}
 %{!?test_build:     %define test_build      0}
 %{!?extra_option:   %define extra_option    %{nil}}
+%{!?config_support: %define config_support  1}
+
 %if %{coverage_build} == 1
+# Coverage test requires debug build runtime
+%define build_type Debug
 %define test_build 1
 %endif
 
 BuildRequires:  cmake
 # Require flatbuffers-devel for onert frontend (model loading)
-BuildRequires:  flatbuffers-devel
+BuildRequires:  pkgconfig(flatbuffers)
 
 %ifarch %{arm} aarch64
 # Require python for acl-ex library build pre-process
-BuildRequires:  python
+BuildRequires:  python3
 BuildRequires:  libarmcl-devel >= v21.02
 %endif
 
@@ -50,17 +55,21 @@ Requires(post): /sbin/ldconfig
 Requires(postun): /sbin/ldconfig
 
 %if %{test_build} == 1
-BuildRequires:  boost-devel
-BuildRequires:  tensorflow-lite-devel
+BuildRequires:  pkgconfig(boost)
+BuildRequires:  pkgconfig(tensorflow-lite)
 BuildRequires:  hdf5-devel
 BuildRequires:  libaec-devel
-BuildRequires:  zlib-devel
-BuildRequires:  libjpeg-devel
+BuildRequires:  pkgconfig(zlib)
+BuildRequires:  pkgconfig(libjpeg)
 BuildRequires:  gtest-devel
 %endif
 
+%if %{npud_build} == 1
+BuildRequires:  pkgconfig(glib-2.0)
+%endif
+
 %if %{trix_support} == 1
-BuildRequires:  npu-engine-devel
+BuildRequires:  pkgconfig(npu-engine)
 %endif
 
 %description
@@ -91,7 +100,18 @@ Minimal test binary for VD manual test
 Summary: NNFW Test
 
 %description test
-NNFW test rpm. It does not depends on nnfw rpm since it contains nnfw runtime.
+NNFW test rpm.
+If you want to use test package, you should install runtime package which is build with test build option
+If you want to get coverage info, you should install runtime package which is build with coverage build option
+# TODO Use release runtime pacakge for test
+%endif
+
+%if %{npud_build} == 1
+%package npud
+Summary: NPU daemon
+
+%description npud
+NPU daemon for optimal management of NPU hardware
 %endif
 
 %ifarch armv7l
@@ -112,22 +132,40 @@ NNFW test rpm. It does not depends on nnfw rpm since it contains nnfw runtime.
 
 %define install_dir %{_prefix}
 %define install_path %{buildroot}%{install_dir}
-%define build_env NNFW_WORKSPACE=build
-%define build_options -DCMAKE_BUILD_TYPE=%{build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen -DENABLE_TEST=off -DBUILD_MINIMAL_SAMPLE=on
+%define nnfw_workspace build
+%define build_env NNFW_WORKSPACE=%{nnfw_workspace}
 
-# Set option for test build (and coverage test build)
+# Path to install test bin and scripts (test script assumes path Product/out)
+# TODO Share path with release package
 %define test_install_home /opt/usr/nnfw-test
 %define test_install_dir %{test_install_home}/Product/out
 %define test_install_path %{buildroot}%{test_install_dir}
-%define coverage_option %{nil}
+
+# Set option for test build (and coverage test build)
+%define option_test -DENABLE_TEST=OFF
+%define option_coverage %{nil}
 %define test_suite_list infra/scripts tests/scripts
-%define test_build_type %{build_type}
+
+%if %{test_build} == 1
+# ENVVAR_ONERT_CONFIG: Use environment variable for runtime core configuration and debug
+%define option_test -DENABLE_TEST=ON -DENVVAR_ONERT_CONFIG=ON
+%endif # test_build
+
+# Set option for configuration
+%define option_config %{nil}
+%if %{config_support} == 1
+%if %{npud_build} == 1
+# ENVVAR_NPUD_CONFIG: Use environment variable for npud configuration and debug
+%define option_config -DENVVAR_NPUD_CONFIG=ON
+%endif # npud_build
+%endif # config_support
+
 %if %{coverage_build} == 1
-%define coverage_option -DENABLE_COVERAGE=ON
-%define test_build_type Debug
-%endif
-%define test_build_env NNFW_INSTALL_PREFIX=%{test_install_path} NNFW_WORKSPACE=build_for_test
-%define test_build_options %{coverage_option} -DCMAKE_BUILD_TYPE=%{test_build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen -DENVVAR_ONERT_CONFIG=ON
+%define option_coverage -DENABLE_COVERAGE=ON
+%endif # coverage_build
+
+%define build_options -DCMAKE_BUILD_TYPE=%{build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen -DBUILD_MINIMAL_SAMPLE=ON \\\
+        %{option_test} %{option_coverage} %{option_config} %{extra_option}
 
 %prep
 %setup -q
@@ -153,17 +191,13 @@ tar -xf %{SOURCE1016} -C ./externals
 %build
 %ifarch arm armv7l armv7hl aarch64 x86_64 %ix86
 # runtime build
-%{build_env} ./nnfw configure %{build_options} %{extra_option}
+%{build_env} ./nnfw configure %{build_options}
 %{build_env} ./nnfw build -j4
 # install in workspace
 # TODO Set install path
 %{build_env} ./nnfw install
 
 %if %{test_build} == 1
-# test runtime
-# TODO remove duplicated build process
-%{test_build_env} ./nnfw configure %{test_build_options} %{extra_option}
-%{test_build_env} ./nnfw build -j4
 %if %{coverage_build} == 1
 pwd > tests/scripts/build_path.txt
 %endif # coverage_build
@@ -195,19 +229,37 @@ install -m 0644 ./nnfw.pc.in %{buildroot}%{_libdir}/pkgconfig/nnfw.pc
 install -m 0644 ./nnfw-plugin.pc.in %{buildroot}%{_libdir}/pkgconfig/nnfw-plugin.pc
 
 %if %{test_build} == 1
-%{test_build_env} ./nnfw install
+mkdir -p %{test_install_path}/bin
+mkdir -p %{test_install_path}/unittest
+mkdir -p %{test_install_path}/unittest_standalone
+mkdir -p %{test_install_path}/test
+
+install -m 755 build/out/bin/nnapi_test %{test_install_path}/bin
+install -m 755 build/out/bin/nnpackage_run %{test_install_path}/bin
+install -m 755 build/out/bin/tflite_comparator %{test_install_path}/bin
+install -m 755 build/out/bin/tflite_run %{test_install_path}/bin
+install -m 755 build/out/unittest/* %{test_install_path}/unittest
+install -m 755 build/out/unittest_standalone/*_test %{test_install_path}/unittest_standalone
+install -m 755 build/out/unittest_standalone/test_* %{test_install_path}/unittest_standalone
+cp -r build/out/test/* %{test_install_path}/test
+cp -r build/out/unittest_standalone/nnfw_api_gtest_models %{test_install_path}/unittest_standalone
+
 # Share test script with ubuntu (ignore error if there is no list for target)
-cp tests/nnapi/nnapi_gtest.skip.%{target_arch}-* %{buildroot}%{test_install_dir}/unittest/.
-cp %{buildroot}%{test_install_dir}/unittest/nnapi_gtest.skip.%{target_arch}-linux.cpu %{buildroot}%{test_install_dir}/unittest/nnapi_gtest.skip
+cp tests/nnapi/nnapi_gtest.skip.%{target_arch}-* %{test_install_path}/unittest/.
+cp %{test_install_path}/unittest/nnapi_gtest.skip.%{target_arch}-linux.cpu %{test_install_path}/unittest/nnapi_gtest.skip
 tar -zxf test-suite.tar.gz -C %{buildroot}%{test_install_home}
 
 %if %{coverage_build} == 1
 mkdir -p %{buildroot}%{test_install_home}/gcov
-find . -name "*.gcno" -exec xargs cp {} %{buildroot}%{test_install_home}/gcov/. \;
+find %{nnfw_workspace} -name "*.gcno" -exec xargs cp {} %{buildroot}%{test_install_home}/gcov/. \;
 install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/test/build_path.txt
 %endif # coverage_build
 %endif # test_build
 
+%if %{npud_build} == 1
+install -m 755 build/out/bin/npud %{buildroot}%{_bindir}
+%endif
+
 %endif
 
 %post -p /sbin/ldconfig
@@ -256,6 +308,15 @@ install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/t
 %endif # arm armv7l armv7hl aarch64
 %endif # test_build
 
+%if %{npud_build} == 1
+%files npud
+%manifest %{name}.manifest
+%defattr(-,root,root,-)
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86
+%{_bindir}/npud
+%endif # arm armv7l armv7hl aarch64 x86_64 %ix86
+%endif # npud_build
+
 %changelog
 * Thu Mar 15 2018 Chunseok Lee <chunseok.lee@samsung.com>
 - Initial spec file for nnfw
diff --git a/res/CircleRecipes/Quant_InstanceNorm_000/test.qconf.json b/res/CircleRecipes/Quant_InstanceNorm_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/CircleRecipes/Quant_InstanceNorm_000/test.recipe b/res/CircleRecipes/Quant_InstanceNorm_000/test.recipe
new file mode 100644
index 000000000..b9c2ab8c9
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_000/test.recipe
@@ -0,0 +1,43 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 6 dim: 12 }
+}
+operand {
+  name: "gamma"
+  type: FLOAT32
+  shape { dim: 12 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "beta"
+  type: FLOAT32
+  shape { dim: 12 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 6 dim: 12 }
+}
+operation {
+  type: "InstanceNorm"
+  input: "ifm"
+  input: "gamma"
+  input: "beta"
+  output: "ofm"
+  instance_norm_options {
+    epsilon: 0.00001
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/CircleRecipes/Quant_InstanceNorm_000/test.reverse b/res/CircleRecipes/Quant_InstanceNorm_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_000/test.reverse
diff --git a/res/CircleRecipes/Quant_InstanceNorm_000/test.rule b/res/CircleRecipes/Quant_InstanceNorm_000/test.rule
new file mode 100644
index 000000000..a17692d05
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "GAMMA_S16"             $(tensor_dtype gamma) '=' INT16
+RULE    "BETA_S16"              $(tensor_dtype beta) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/CircleRecipes/Quant_InstanceNorm_001/test.qconf.json b/res/CircleRecipes/Quant_InstanceNorm_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/CircleRecipes/Quant_InstanceNorm_001/test.recipe b/res/CircleRecipes/Quant_InstanceNorm_001/test.recipe
new file mode 100644
index 000000000..b9c2ab8c9
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_001/test.recipe
@@ -0,0 +1,43 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 6 dim: 12 }
+}
+operand {
+  name: "gamma"
+  type: FLOAT32
+  shape { dim: 12 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "beta"
+  type: FLOAT32
+  shape { dim: 12 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 6 dim: 12 }
+}
+operation {
+  type: "InstanceNorm"
+  input: "ifm"
+  input: "gamma"
+  input: "beta"
+  output: "ofm"
+  instance_norm_options {
+    epsilon: 0.00001
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/CircleRecipes/Quant_InstanceNorm_001/test.reverse b/res/CircleRecipes/Quant_InstanceNorm_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_001/test.reverse
diff --git a/res/CircleRecipes/Quant_InstanceNorm_001/test.rule b/res/CircleRecipes/Quant_InstanceNorm_001/test.rule
new file mode 100644
index 000000000..e62dd4839
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "GAMMA_U8"               $(tensor_dtype gamma) '=' UINT8
+RULE    "BETA_U8"                $(tensor_dtype beta) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/ArgMax_004/test.recipe b/res/TensorFlowLiteRecipes/ArgMax_004/test.recipe
new file mode 100644
index 000000000..b31e16043
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_004/test.recipe
@@ -0,0 +1,30 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: INT64
+  shape { dim: 1 dim: 4 }
+}
+operand {
+  name: "argmax/dim"
+  type: INT32
+  shape { }
+  filler {
+    tag: "explicit"
+    arg: "-1"
+  }
+}
+operation {
+  type: "ArgMax"
+  argmax_options {
+    output_type: INT64
+  }
+  input: "ifm"
+  input: "argmax/dim"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ArgMax_004/test.reverse b/res/TensorFlowLiteRecipes/ArgMax_004/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_004/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Densify_000/test.recipe b/res/TensorFlowLiteRecipes/Densify_000/test.recipe
new file mode 100644
index 000000000..480c52f15
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Densify_000/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "in"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "sparse"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "0" arg: "0" arg: "0"
+    arg: "0" arg: "2" arg: "0" arg: "0"
+    arg: "0" arg: "0" arg: "3" arg: "0"
+    arg: "0" arg: "0" arg: "0" arg: "4"
+  }
+  make_sparse: true
+}
+operand {
+  name: "dense"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operation {
+  type: "Densify"
+  input: "sparse"
+  output: "dense"
+}
+operation {
+  type: "Add"
+  input: "in"
+  input: "dense"
+  output: "out"
+  add_options {
+    activation: NONE
+  }
+}
+input: "in"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_007/test.recipe b/res/TensorFlowLiteRecipes/FullyConnected_007/test.recipe
new file mode 100644
index 000000000..572badfbb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_007/test.recipe
@@ -0,0 +1,29 @@
+operand {
+  name: "x"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "y"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 2 dim: 2 }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+    keep_num_dims: true
+  }
+  input: "x"
+  input: "y"
+  input: ""
+  output: "out"
+}
+input: "x"
+input: "y"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_007/test.reverse b/res/TensorFlowLiteRecipes/FullyConnected_007/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_007/test.reverse
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_007/test.rule b/res/TensorFlowLiteRecipes/FullyConnected_007/test.rule
new file mode 100644
index 000000000..01518e575
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_007/test.rule
@@ -0,0 +1,7 @@
+# To check if FullyConnected with non-const weight is replaced by MatMul
+# with replace_non_const_fc_with_batch_matmul pass
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "BATCH_MATMUL_EXIST"      $(op_count BATCH_MATMUL) '=' 1
+RULE    "NO_FULLY_CONNECTED"      $(op_count FULLY_CONNECTED) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Densify_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Densify_Add_000/test.recipe
new file mode 100644
index 000000000..ea604b20f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Densify_Add_000/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "sparse"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "0" arg: "0" arg: "0"
+    arg: "0" arg: "2" arg: "0" arg: "0"
+    arg: "0" arg: "0" arg: "3" arg: "0"
+    arg: "0" arg: "0" arg: "0" arg: "4"
+  }
+  make_sparse: true
+}
+operand {
+  name: "dense"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operation {
+  type: "Densify"
+  input: "sparse"
+  output: "dense"
+}
+operation {
+  type: "Add"
+  input: "ifm"
+  input: "dense"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Densify_Dequantize_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Densify_Dequantize_Add_000/test.recipe
new file mode 100644
index 000000000..6e1083fae
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Densify_Dequantize_Add_000/test.recipe
@@ -0,0 +1,54 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "sparse16"
+  type: FLOAT16
+  shape { dim: 4 dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "0" arg: "0" arg: "0"
+    arg: "0" arg: "2" arg: "0" arg: "0"
+    arg: "0" arg: "0" arg: "3" arg: "0"
+    arg: "0" arg: "0" arg: "0" arg: "4"
+  }
+  make_sparse: true
+}
+operand {
+  name: "dense16"
+  type: FLOAT16
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "dense32"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+}
+operation {
+  type: "Densify"
+  input: "sparse16"
+  output: "dense16"
+}
+operation {
+  type: "Dequantize"
+  input: "dense16"
+  output: "dense32"
+}
+operation {
+  type: "Add"
+  input: "ifm"
+  input: "dense32"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Dequantize_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Dequantize_Add_000/test.recipe
new file mode 100644
index 000000000..5f212a7a6
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Dequantize_Add_000/test.recipe
@@ -0,0 +1,41 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "float16"
+  type: FLOAT16
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "dequantized"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "Dequantize"
+  input: "float16"
+  output: "dequantized"
+}
+operation {
+  type: "Add"
+  input: "ifm"
+  input: "dequantized"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_Add_000/test.recipe
index b3247f24f..afb9a9c4d 100644
--- a/res/TensorFlowLiteRecipes/Net_TConv_Add_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Net_TConv_Add_000/test.recipe
@@ -12,9 +12,6 @@ operand {
     arg: "0.0"
     arg: "0.1"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "Addition"
@@ -25,9 +22,6 @@ operand {
     dim: 4
     dim: 1
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "Addition_add_param"
@@ -39,9 +33,6 @@ operand {
     tag: "explicit"
     arg: "-2.04724"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "Hole"
@@ -52,11 +43,6 @@ operand {
     dim: 2
     dim: 2
   }
-  quant {
-    min: 0
-    max: 255
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "conv2d_transpose"
@@ -67,9 +53,6 @@ operand {
     dim: 4
     dim: 1
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "input_size"
@@ -84,9 +67,6 @@ operand {
     arg: "4"
     arg: "1"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operation {
   type: "TransposeConv"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_Add_001/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_Add_001/test.recipe
index 89a344f0e..b1c9784b0 100644
--- a/res/TensorFlowLiteRecipes/Net_TConv_Add_001/test.recipe
+++ b/res/TensorFlowLiteRecipes/Net_TConv_Add_001/test.recipe
@@ -12,9 +12,6 @@ operand {
     arg: "0.0"
     arg: "0.1"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "Addition"
@@ -25,9 +22,6 @@ operand {
     dim: 4
     dim: 1
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "Addition_add_param"
@@ -45,9 +39,6 @@ operand {
     arg: "1" arg: "2" arg: "3" arg: "4"
     arg: "-1" arg: "-2" arg: "-3" arg: "-4"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "Hole"
@@ -58,11 +49,6 @@ operand {
     dim: 2
     dim: 2
   }
-  quant {
-    min: 0
-    max: 255
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "conv2d_transpose"
@@ -73,9 +59,6 @@ operand {
     dim: 4
     dim: 1
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "input_size"
@@ -90,9 +73,6 @@ operand {
     arg: "4"
     arg: "1"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operation {
   type: "TransposeConv"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_Add_002/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_Add_002/test.recipe
index cfea30653..426551485 100644
--- a/res/TensorFlowLiteRecipes/Net_TConv_Add_002/test.recipe
+++ b/res/TensorFlowLiteRecipes/Net_TConv_Add_002/test.recipe
@@ -12,9 +12,6 @@ operand {
     arg: "0.0"
     arg: "0.1"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "Addition"
@@ -25,9 +22,6 @@ operand {
     dim: 4
     dim: 1
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "Addition_add_param"
@@ -39,9 +33,6 @@ operand {
     tag: "explicit"
     arg: "-2.04724"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "Hole"
@@ -52,11 +43,6 @@ operand {
     dim: 2
     dim: 2
   }
-  quant {
-    min: 0
-    max: 255
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "conv2d_transpose"
@@ -67,9 +53,6 @@ operand {
     dim: 4
     dim: 1
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "input_size"
@@ -84,9 +67,6 @@ operand {
     arg: "4"
     arg: "1"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operation {
   type: "TransposeConv"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe
index 65248f23b..ef329e1ac 100644
--- a/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe
@@ -12,9 +12,6 @@ operand {
     arg: "0.0"
     arg: "0.1"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "FusedBatchNormV3"
@@ -25,9 +22,6 @@ operand {
     dim: 4
     dim: 1
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "FusedBatchNormV3_add_param"
@@ -39,9 +33,6 @@ operand {
     tag: "explicit"
     arg: "-2.04724"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "FusedBatchNormV3_mul_0"
@@ -52,9 +43,6 @@ operand {
     dim: 4
     dim: 1
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "FusedBatchNormV3_mul_0_param"
@@ -66,9 +54,6 @@ operand {
     tag: "explicit"
     arg: "2.00834"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "Hole"
@@ -79,11 +64,6 @@ operand {
     dim: 2
     dim: 1
   }
-  quant {
-    min: 0
-    max: 255
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "conv2d_transpose"
@@ -94,9 +74,6 @@ operand {
     dim: 4
     dim: 1
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "conv2d_transpose/input_sizes"
@@ -111,9 +88,6 @@ operand {
     arg: "4"
     arg: "1"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operation {
   type: "TransposeConv"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_001/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_001/test.recipe
index babf5af4e..1b329bafc 100644
--- a/res/TensorFlowLiteRecipes/Net_TConv_BN_001/test.recipe
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_001/test.recipe
@@ -12,9 +12,6 @@ operand {
     arg: "0.0"
     arg: "0.1"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "FusedBatchNormV3"
@@ -25,9 +22,6 @@ operand {
     dim: 4
     dim: 1
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "FusedBatchNormV3_add_param"
@@ -39,9 +33,6 @@ operand {
     tag: "explicit"
     arg: "-2.04724"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "FusedBatchNormV3_mul_0"
@@ -52,9 +43,6 @@ operand {
     dim: 4
     dim: 1
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "FusedBatchNormV3_mul_0_param"
@@ -66,9 +54,6 @@ operand {
     tag: "explicit"
     arg: "2.00834"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "Hole"
@@ -79,11 +64,6 @@ operand {
     dim: 2
     dim: 2
   }
-  quant {
-    min: 0
-    max: 255
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "conv2d_transpose"
@@ -94,9 +74,6 @@ operand {
     dim: 4
     dim: 1
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "conv2d_transpose/input_sizes"
@@ -111,9 +88,6 @@ operand {
     arg: "4"
     arg: "1"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operation {
   type: "TransposeConv"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.recipe
index e40fe4f59..a8af8e497 100644
--- a/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.recipe
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.recipe
@@ -8,10 +8,6 @@ operand {
     dim: 1
     dim: 2
   }
-  quant {
-    quantized_dimension: 0
-  }
-  is_variable: false
 }
 operand {
   name: "conv2d_transpose/input_sizes"
@@ -26,10 +22,6 @@ operand {
     arg: "1"
     arg: "2"
   }
-  quant {
-    quantized_dimension: 0
-  }
-  is_variable: false
 }
 operand {
   name: "FusedBatchNormV3"
@@ -42,10 +34,6 @@ operand {
     arg: "-2.04724"
     arg: "-7.80109"
   }
-  quant {
-    quantized_dimension: 0
-  }
-  is_variable: false
 }
 operand {
   name: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes"
@@ -61,10 +49,6 @@ operand {
     arg: "0.0"
     arg: "0.1"
   }
-  quant {
-    quantized_dimension: 0
-  }
-  is_variable: false
 }
 operand {
   name: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
@@ -75,10 +59,6 @@ operand {
     dim: 1
     dim: 2
   }
-  quant {
-    quantized_dimension: 0
-  }
-  is_variable: false
 }
 operand {
   name: "FusedBatchNormV3_mul_0"
@@ -89,9 +69,6 @@ operand {
     dim: 1
     dim: 2
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "FusedBatchNormV3_mul_0_param"
@@ -104,9 +81,6 @@ operand {
     arg: "2.00834"
     arg: "1.00344"
   }
-  quant {
-    quantized_dimension: 0
-  }
 }
 operand {
   name: "Relu6"
@@ -117,10 +91,6 @@ operand {
     dim: 1
     dim: 2
   }
-  quant {
-    quantized_dimension: 0
-  }
-  is_variable: false
 }
 operation {
   type: "TransposeConv"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.recipe
new file mode 100644
index 000000000..c28e50880
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.recipe
@@ -0,0 +1,135 @@
+operand {
+  name: "Const_transposed"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 3
+    dim: 3
+    dim: 2
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "Output"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 1
+  }
+}
+operand {
+  name: "FusedBatchNormV3_add_param"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "-2.04724"
+  }
+}
+operand {
+  name: "FusedBatchNormV3_mul_0"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 1
+  }
+}
+operand {
+  name: "FusedBatchNormV3_mul_0_param"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "2.00834"
+  }
+}
+operand {
+  name: "Input"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 2
+    dim: 2
+    dim: 2
+  }
+}
+operand {
+  name: "conv2d_transpose"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 1
+  }
+}
+operand {
+  name: "conv2d_transpose/input_sizes"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "4"
+    arg: "4"
+    arg: "1"
+  }
+}
+operand {
+  name: "conv2d_transpose/bias"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "1.03"
+  }
+}
+operation {
+  type: "TransposeConv"
+  input: "conv2d_transpose/input_sizes"
+  input: "Const_transposed"
+  input: "Input"
+  input: "conv2d_transpose/bias"
+  output: "conv2d_transpose"
+  transpose_conv_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+}
+operation {
+  type: "Mul"
+  input: "conv2d_transpose"
+  input: "FusedBatchNormV3_mul_0_param"
+  output: "FusedBatchNormV3_mul_0"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "FusedBatchNormV3_mul_0"
+  input: "FusedBatchNormV3_add_param"
+  output: "Output"
+  add_options {
+    activation: NONE
+  }
+}
+input: "Input"
+output: "Output"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.rule b/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.rule
new file mode 100644
index 000000000..0988ecf28
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.rule
@@ -0,0 +1,7 @@
+# To check if BatchNorm op(mul + add) is fused to Transposed Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "TCONV_EXIST"             $(op_count TRANSPOSE_CONV) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.recipe
new file mode 100644
index 000000000..b75527a98
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.recipe
@@ -0,0 +1,149 @@
+operand {
+  name: "conv2d_transpose/input_sizes"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "4"
+    arg: "4"
+    arg: "16"
+  }
+}
+operand {
+  name: "Const_transposed"
+  type: FLOAT32
+  shape {
+    dim: 16
+    dim: 3
+    dim: 3
+    dim: 2
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "Input"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 2
+    dim: 2
+    dim: 2
+  }
+}
+operand {
+  name: "conv2d_transpose/bias"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 16
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "conv2d_transpose"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 16
+  }
+}
+operation {
+  type: "TransposeConv"
+  input: "conv2d_transpose/input_sizes"
+  input: "Const_transposed"
+  input: "Input"
+  input: "conv2d_transpose/bias"
+  output: "conv2d_transpose"
+  transpose_conv_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+}
+
+operand {
+  name: "FusedBatchNormV3_mul_0"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 16
+  }
+}
+operand {
+  name: "FusedBatchNormV3_mul_0_param"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 16
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operation {
+  type: "Mul"
+  input: "conv2d_transpose"
+  input: "FusedBatchNormV3_mul_0_param"
+  output: "FusedBatchNormV3_mul_0"
+  mul_options {
+    activation: NONE
+  }
+}
+
+operand {
+  name: "FusedBatchNormV3_add_param"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 1
+    dim: 1
+    dim: 16
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+}
+operand {
+  name: "Output"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 16
+  }
+}
+operation {
+  type: "Add"
+  input: "FusedBatchNormV3_mul_0"
+  input: "FusedBatchNormV3_add_param"
+  output: "Output"
+  add_options {
+    activation: NONE
+  }
+}
+input: "Input"
+output: "Output"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.rule b/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.rule
new file mode 100644
index 000000000..0988ecf28
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.rule
@@ -0,0 +1,7 @@
+# To check if BatchNorm op(mul + add) is fused to Transposed Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "TCONV_EXIST"             $(op_count TRANSPOSE_CONV) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Add_001/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Add_001/test.recipe
new file mode 100644
index 000000000..0ae4862d1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_001/test.recipe
@@ -0,0 +1,31 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "Add"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm1"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Add_001/test.rule
new file mode 100644
index 000000000..b51f4ebbb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_001/test.rule
@@ -0,0 +1,12 @@
+# To check mixed quantization.
+# Default dtype: U8, Add dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM1_U8"                $(tensor_dtype ifm1) '=' UINT8
+RULE    "IFM1_QUANTIZE_S16"      $(tensor_dtype ifm1_Quantize) '=' INT16
+RULE    "IFM2_S16"               $(tensor_dtype ifm2) '=' INT16
+RULE    "ADD_S16"                $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_002/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Add_002/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_002/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_002/test.recipe b/res/TensorFlowLiteRecipes/Quant_Add_002/test.recipe
new file mode 100644
index 000000000..0ae4862d1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_002/test.recipe
@@ -0,0 +1,31 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "Add"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm1"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_002/test.rule b/res/TensorFlowLiteRecipes/Quant_Add_002/test.rule
new file mode 100644
index 000000000..96a2535ef
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_002/test.rule
@@ -0,0 +1,12 @@
+# To check mixed quantization.
+# Default dtype: S16, Add dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM1_S16"                $(tensor_dtype ifm1) '=' INT16
+RULE    "IFM1_QUANTIZE_U8"        $(tensor_dtype ifm1_Quantize) '=' UINT8
+RULE    "IFM2_U8"                 $(tensor_dtype ifm2) '=' UINT8
+RULE    "ADD_U8"                  $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_U8"               $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"             $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.recipe
new file mode 100644
index 000000000..746c34334
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.recipe
@@ -0,0 +1,24 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+}
+operation {
+  type: "AveragePool2D"
+  averagepool2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    filter_width: 2
+    filter_height: 2
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.rule b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.recipe
new file mode 100644
index 000000000..746c34334
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.recipe
@@ -0,0 +1,24 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+}
+operation {
+  type: "AveragePool2D"
+  averagepool2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    filter_width: 2
+    filter_height: 2
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.rule b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.recipe
new file mode 100644
index 000000000..2f2e91a9e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.recipe
@@ -0,0 +1,28 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 3 dim: 4 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 4 }
+}
+operation {
+  type: "BatchMatMul"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  batch_matmul_options {
+    adj_x: false
+    adj_y: false
+  }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.rule b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.rule
new file mode 100644
index 000000000..e832ac526
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM1_U8"               $(tensor_dtype ifm1) '=' UINT8
+RULE    "IFM1_QUANTIZE_S16"     $(tensor_dtype ifm1_Quantize) '=' INT16
+RULE    "IFM2_U8"               $(tensor_dtype ifm2) '=' UINT8
+RULE    "IFM2_QUANTIZE_S16"     $(tensor_dtype ifm2_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.recipe
new file mode 100644
index 000000000..2f2e91a9e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.recipe
@@ -0,0 +1,28 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 3 dim: 4 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 4 }
+}
+operation {
+  type: "BatchMatMul"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  batch_matmul_options {
+    adj_x: false
+    adj_y: false
+  }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.rule b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.rule
new file mode 100644
index 000000000..248337716
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM1_S16"               $(tensor_dtype ifm1) '=' INT16
+RULE    "IFM1_QUANTIZE_U8"       $(tensor_dtype ifm1_Quantize) '=' UINT8
+RULE    "IFM2_S16"               $(tensor_dtype ifm2) '=' INT16
+RULE    "IFM2_QUANTIZE_U8"       $(tensor_dtype ifm2_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.recipe
new file mode 100644
index 000000000..35641bd07
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.recipe
@@ -0,0 +1,28 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 1 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "Concatenation"
+  concatenation_options {
+    axis: 3
+    activation: NONE
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.rule
new file mode 100644
index 000000000..e832ac526
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM1_U8"               $(tensor_dtype ifm1) '=' UINT8
+RULE    "IFM1_QUANTIZE_S16"     $(tensor_dtype ifm1_Quantize) '=' INT16
+RULE    "IFM2_U8"               $(tensor_dtype ifm2) '=' UINT8
+RULE    "IFM2_QUANTIZE_S16"     $(tensor_dtype ifm2_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.recipe
new file mode 100644
index 000000000..35641bd07
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.recipe
@@ -0,0 +1,28 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 1 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "Concatenation"
+  concatenation_options {
+    axis: 3
+    activation: NONE
+  }
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.rule
new file mode 100644
index 000000000..248337716
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM1_S16"               $(tensor_dtype ifm1) '=' INT16
+RULE    "IFM1_QUANTIZE_U8"       $(tensor_dtype ifm1_Quantize) '=' UINT8
+RULE    "IFM2_S16"               $(tensor_dtype ifm2) '=' INT16
+RULE    "IFM2_QUANTIZE_U8"       $(tensor_dtype ifm2_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_000/test.recipe
new file mode 100644
index 000000000..8a9328be1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_000/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm"
+  input: "filter"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_000/test.rule
new file mode 100644
index 000000000..f7af083da
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_000/test.rule
@@ -0,0 +1,10 @@
+# To check float32 input.
+# Input is float32, Conv is uint8. Quantize Op is inserted at the beginning.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "INPUT_FLOAT32"           $(tensor_dtype ifm) '=' FLOAT32
+RULE    "CONV_UINT8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "WEIGHTS_UINT8"           $(tensor_dtype filter) '=' UINT8
+RULE    "BIAS_INT32"              $(tensor_dtype bias) '=' INT32
+RULE    "QUANTIZE_OP"             $(op_count QUANTIZE) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_001/test.recipe
new file mode 100644
index 000000000..8a9328be1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_001/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm"
+  input: "filter"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_001/test.rule
new file mode 100644
index 000000000..a3f52f26d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_001/test.rule
@@ -0,0 +1,11 @@
+# To check float32 output.
+# Output is float32, Conv is uint8. Dequantize Op is inserted at the end.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+# Update tensor name (ofm_Dequantize) if 'create_dequantize' function is changed.
+RULE    "OUTPUT_FLOAT32"          $(tensor_dtype ofm_Dequantize) '=' FLOAT32
+RULE    "CONV_UINT8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "WEIGHTS_UINT8"           $(tensor_dtype filter) '=' UINT8
+RULE    "BIAS_INT32"              $(tensor_dtype bias) '=' INT32
+RULE    "DEQUANTIZE_OP"           $(op_count DEQUANTIZE) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_002/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_002/test.recipe
new file mode 100644
index 000000000..8a9328be1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_002/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm"
+  input: "filter"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_002/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_002/test.rule
new file mode 100644
index 000000000..2187895f8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_002/test.rule
@@ -0,0 +1,13 @@
+# To check float32 input/output.
+# Input/Output is float32, Conv is uint8.
+# Quantize Op is inserted at the beginning, Dequantize Op is inserted at the end.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "INPUT_FLOAT32"           $(tensor_dtype ifm) '=' FLOAT32
+RULE    "OUTPUT_FLOAT32"          $(tensor_dtype ofm_Dequantize) '=' FLOAT32
+RULE    "CONV_UINT8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "WEIGHTS_UINT8"           $(tensor_dtype filter) '=' UINT8
+RULE    "BIAS_INT32"              $(tensor_dtype bias) '=' INT32
+RULE    "QUANTIZE_OP"             $(op_count QUANTIZE) '=' 1
+RULE    "DEQUANTIZE_OP"           $(op_count DEQUANTIZE) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_003/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_003/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.recipe
new file mode 100644
index 000000000..9cf8a0f69
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ker"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "ifm"
+  input: "ker"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_003/test.reverse b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_003/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.rule
new file mode 100644
index 000000000..50f235a55
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "KER_S16"               $(tensor_dtype ker) '=' INT16
+RULE    "BIAS_S64"              $(tensor_dtype bias) '=' INT64
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_004/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_004/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.recipe
new file mode 100644
index 000000000..9cf8a0f69
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ker"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "ifm"
+  input: "ker"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_004/test.reverse b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_004/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.rule
new file mode 100644
index 000000000..ffa3bc906
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "KER_U8"                 $(tensor_dtype ker) '=' UINT8
+RULE    "BIAS_S32"               $(tensor_dtype bias) '=' INT32
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.recipe
new file mode 100644
index 000000000..148256aa2
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.recipe
@@ -0,0 +1,49 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 2 dim: 2 }
+}
+operand {
+  name: "ker"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 2 dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "3" arg: "4"
+    arg: "-9" arg: "10" arg: "-11" arg: "12"
+    arg: "5" arg: "6" arg: "7" arg: "8"
+    arg: "13" arg: "-14" arg: "15" arg: "-16"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "3" arg: "4"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 1 dim: 4 }
+}
+operation {
+  type: "DepthwiseConv2D"
+  depthwiseconv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 2
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+    depth_multiplier: 2
+    activation : RELU
+  }
+  input: "ifm"
+  input: "ker"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.rule b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.rule
new file mode 100644
index 000000000..50f235a55
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "KER_S16"               $(tensor_dtype ker) '=' INT16
+RULE    "BIAS_S64"              $(tensor_dtype bias) '=' INT64
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.recipe
new file mode 100644
index 000000000..148256aa2
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.recipe
@@ -0,0 +1,49 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 2 dim: 2 }
+}
+operand {
+  name: "ker"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 2 dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "3" arg: "4"
+    arg: "-9" arg: "10" arg: "-11" arg: "12"
+    arg: "5" arg: "6" arg: "7" arg: "8"
+    arg: "13" arg: "-14" arg: "15" arg: "-16"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "3" arg: "4"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 1 dim: 4 }
+}
+operation {
+  type: "DepthwiseConv2D"
+  depthwiseconv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 2
+    dilation_w_factor: 1
+    dilation_h_factor: 1
+    depth_multiplier: 2
+    activation : RELU
+  }
+  input: "ifm"
+  input: "ker"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.rule b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.rule
new file mode 100644
index 000000000..ffa3bc906
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "KER_U8"                 $(tensor_dtype ker) '=' UINT8
+RULE    "BIAS_S32"               $(tensor_dtype bias) '=' INT32
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.qconf.json
new file mode 100644
index 000000000..ad2bad697
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "out",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.recipe
new file mode 100644
index 000000000..0ecb5618b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.recipe
@@ -0,0 +1,55 @@
+operand {
+  name: "in"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 }
+}
+operand {
+  name: "weight"
+  type: FLOAT32
+  shape { dim: 4 dim: 16 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "-2" arg: "-3" arg: "4"
+  }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+  }
+  input: "in"
+  input: "weight"
+  input: "bias"
+  output: "out"
+}
+input: "in"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.rule b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.rule
new file mode 100644
index 000000000..f54256084
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IN_U8"                 $(tensor_dtype in) '=' UINT8
+RULE    "IN_QUANTIZE_S16"       $(tensor_dtype in_Quantize) '=' INT16
+RULE    "WEIGHT_S16"            $(tensor_dtype weight) '=' INT16
+RULE    "BIAS_S64"              $(tensor_dtype bias) '=' INT64
+RULE    "TARGET_S16"            $(tensor_dtype out) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype out_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.qconf.json
new file mode 100644
index 000000000..ff3eb9791
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "out",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.recipe
new file mode 100644
index 000000000..0ecb5618b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.recipe
@@ -0,0 +1,55 @@
+operand {
+  name: "in"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 }
+}
+operand {
+  name: "weight"
+  type: FLOAT32
+  shape { dim: 4 dim: 16 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "-2" arg: "-3" arg: "4"
+  }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+  }
+  input: "in"
+  input: "weight"
+  input: "bias"
+  output: "out"
+}
+input: "in"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.rule b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.rule
new file mode 100644
index 000000000..4acd22946
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IN_S16"                 $(tensor_dtype in) '=' INT16
+RULE    "IN_QUANTIZE_U8"         $(tensor_dtype in_Quantize) '=' UINT8
+RULE    "WEIGHT_U8"              $(tensor_dtype weight) '=' UINT8
+RULE    "BIAS_S32"               $(tensor_dtype bias) '=' INT32
+RULE    "TARGET_U8"              $(tensor_dtype out) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype out_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.recipe
new file mode 100644
index 000000000..836a37305
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.recipe
@@ -0,0 +1,20 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "LeakyRelu"
+  leaky_relu_options {
+    alpha: 2.0
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.rule b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.recipe
new file mode 100644
index 000000000..836a37305
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.recipe
@@ -0,0 +1,20 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "LeakyRelu"
+  leaky_relu_options {
+    alpha: 2.0
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.rule b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.recipe
new file mode 100644
index 000000000..dca24da4c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Logistic"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.recipe
new file mode 100644
index 000000000..dca24da4c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Logistic"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.recipe
new file mode 100644
index 000000000..718630f08
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.recipe
@@ -0,0 +1,24 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+}
+operation {
+  type: "MaxPool2D"
+  maxpool2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    filter_width: 2
+    filter_height: 2
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.rule b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.recipe
new file mode 100644
index 000000000..718630f08
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.recipe
@@ -0,0 +1,24 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+}
+operation {
+  type: "MaxPool2D"
+  maxpool2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    filter_width: 2
+    filter_height: 2
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.rule b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.recipe
new file mode 100644
index 000000000..d383997d3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+  name: "reduction_indices"
+  type: INT32
+  shape { dim: 1 }
+  filler { tag: "explicit" arg: "-1" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operation {
+  type: "Mean"
+  mean_options {
+    keep_dims: true
+  }
+  input: "ifm"
+  input: "reduction_indices"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.recipe
new file mode 100644
index 000000000..d383997d3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+  name: "reduction_indices"
+  type: INT32
+  shape { dim: 1 }
+  filler { tag: "explicit" arg: "-1" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operation {
+  type: "Mean"
+  mean_options {
+    keep_dims: true
+  }
+  input: "ifm"
+  input: "reduction_indices"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.recipe
new file mode 100644
index 000000000..43ca30dec
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "Mul"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  mul_options {
+    activation: NONE
+  }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.rule
new file mode 100644
index 000000000..e832ac526
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM1_U8"               $(tensor_dtype ifm1) '=' UINT8
+RULE    "IFM1_QUANTIZE_S16"     $(tensor_dtype ifm1_Quantize) '=' INT16
+RULE    "IFM2_U8"               $(tensor_dtype ifm2) '=' UINT8
+RULE    "IFM2_QUANTIZE_S16"     $(tensor_dtype ifm2_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.recipe
new file mode 100644
index 000000000..43ca30dec
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "Mul"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm"
+  mul_options {
+    activation: NONE
+  }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.rule
new file mode 100644
index 000000000..248337716
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM1_S16"               $(tensor_dtype ifm1) '=' INT16
+RULE    "IFM1_QUANTIZE_U8"       $(tensor_dtype ifm1_Quantize) '=' UINT8
+RULE    "IFM2_S16"               $(tensor_dtype ifm2) '=' INT16
+RULE    "IFM2_QUANTIZE_U8"       $(tensor_dtype ifm2_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.recipe
new file mode 100644
index 000000000..447e4a1ab
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Neg"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.recipe
new file mode 100644
index 000000000..447e4a1ab
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Neg"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.recipe
new file mode 100644
index 000000000..c18acdbbc
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "alpha"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "0.1" arg: "0.3" arg: "0.5"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "PRelu"
+  input: "ifm"
+  input: "alpha"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.rule b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.rule
new file mode 100644
index 000000000..81436146c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.rule
@@ -0,0 +1,12 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "ALPHA_S16"             $(tensor_dtype alpha) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.recipe
new file mode 100644
index 000000000..c18acdbbc
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "alpha"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "0.1" arg: "0.3" arg: "0.5"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+  type: "PRelu"
+  input: "ifm"
+  input: "alpha"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.rule b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.rule
new file mode 100644
index 000000000..5b9416017
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.rule
@@ -0,0 +1,12 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "ALPHA_U8"               $(tensor_dtype alpha) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.recipe
new file mode 100644
index 000000000..2cc980b9c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.recipe
@@ -0,0 +1,30 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "padding"
+  type: INT32
+  shape { dim: 4 dim: 2 }
+  filler {
+    tag: "explicit"
+    arg: "0" arg: "0"
+    arg: "1" arg: "1"
+    arg: "2" arg: "2"
+    arg: "0" arg: "0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 7 dim: 2 }
+}
+operation {
+  type: "Pad"
+  input: "ifm"
+  input: "padding"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.recipe
new file mode 100644
index 000000000..2cc980b9c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.recipe
@@ -0,0 +1,30 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "padding"
+  type: INT32
+  shape { dim: 4 dim: 2 }
+  filler {
+    tag: "explicit"
+    arg: "0" arg: "0"
+    arg: "1" arg: "1"
+    arg: "2" arg: "2"
+    arg: "0" arg: "0"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 7 dim: 2 }
+}
+operation {
+  type: "Pad"
+  input: "ifm"
+  input: "padding"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.recipe
new file mode 100644
index 000000000..226593593
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "ReLU6"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.rule b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.recipe
new file mode 100644
index 000000000..226593593
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "ReLU6"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.rule b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.recipe
new file mode 100644
index 000000000..8eaa3602f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.rule b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.recipe
new file mode 100644
index 000000000..8eaa3602f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.rule b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.recipe
new file mode 100644
index 000000000..cdca58980
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.recipe
@@ -0,0 +1,20 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 10 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 10 }
+}
+operation {
+  type: "Reshape"
+  reshape_options {
+    new_shape: 10
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.recipe
new file mode 100644
index 000000000..cdca58980
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.recipe
@@ -0,0 +1,20 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 10 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 10 }
+}
+operation {
+  type: "Reshape"
+  reshape_options {
+    new_shape: 10
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.recipe
new file mode 100644
index 000000000..3dd4c761c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.recipe
@@ -0,0 +1,30 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "size"
+  type: INT32
+  shape { dim: 2 }
+  filler {
+    tag: "constant" arg: "16" arg: "16"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operation {
+  type: "ResizeBilinear"
+  input: "ifm1"
+  input: "size"
+  output: "ofm"
+  resize_bilinear_options {
+    align_corners: false
+    half_pixel_centers: false
+  }
+}
+input: "ifm1"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.rule b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.rule
new file mode 100644
index 000000000..3a3429d41
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm1) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm1_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.recipe
new file mode 100644
index 000000000..3dd4c761c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.recipe
@@ -0,0 +1,30 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "size"
+  type: INT32
+  shape { dim: 2 }
+  filler {
+    tag: "constant" arg: "16" arg: "16"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operation {
+  type: "ResizeBilinear"
+  input: "ifm1"
+  input: "size"
+  output: "ofm"
+  resize_bilinear_options {
+    align_corners: false
+    half_pixel_centers: false
+  }
+}
+input: "ifm1"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.rule b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.rule
new file mode 100644
index 000000000..2c5fcd5a3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm1) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm1_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.recipe
new file mode 100644
index 000000000..ef6b964c9
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 8 }
+}
+operand {
+    name: "size"
+    type: INT32
+    shape { dim: 2 }
+    filler { tag: "explicit" arg: "16" arg: "16" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 8 }
+}
+operation {
+  type: "ResizeNearestNeighbor"
+  resize_nearest_neighbor_options {
+    align_corners: true
+  }
+  input: "ifm"
+  input: "size"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.rule b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.recipe
new file mode 100644
index 000000000..ef6b964c9
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 8 }
+}
+operand {
+    name: "size"
+    type: INT32
+    shape { dim: 2 }
+    filler { tag: "explicit" arg: "16" arg: "16" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 8 }
+}
+operation {
+  type: "ResizeNearestNeighbor"
+  resize_nearest_neighbor_options {
+    align_corners: true
+  }
+  input: "ifm"
+  input: "size"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.rule b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.recipe
new file mode 100644
index 000000000..2f9ccddfa
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.recipe
@@ -0,0 +1,37 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 2 dim: 3 }
+}
+operand {
+  name: "begin"
+  type: INT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "0" arg: "0"
+  }
+}
+operand {
+  name: "size"
+  type: INT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "1" arg: "3"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 3 }
+}
+operation {
+  type: "Slice"
+  input: "ifm"
+  input: "begin"
+  input: "size"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.recipe
new file mode 100644
index 000000000..2f9ccddfa
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.recipe
@@ -0,0 +1,37 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 2 dim: 3 }
+}
+operand {
+  name: "begin"
+  type: INT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "0" arg: "0"
+  }
+}
+operand {
+  name: "size"
+  type: INT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "1" arg: "3"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 3 }
+}
+operation {
+  type: "Slice"
+  input: "ifm"
+  input: "begin"
+  input: "size"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.recipe
new file mode 100644
index 000000000..ce9abf555
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.recipe
@@ -0,0 +1,20 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Softmax"
+  softmax_options {
+    beta: 0.0
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.recipe
new file mode 100644
index 000000000..ce9abf555
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.recipe
@@ -0,0 +1,20 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Softmax"
+  softmax_options {
+    beta: 0.0
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.recipe
new file mode 100644
index 000000000..7bdf87d47
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Tanh"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.recipe
new file mode 100644
index 000000000..7bdf87d47
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.recipe
@@ -0,0 +1,17 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Tanh"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.recipe
new file mode 100644
index 000000000..c281b0482
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.recipe
@@ -0,0 +1,54 @@
+operand {
+  name: "out_shape"
+  type: INT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "4" arg: "4" arg: "3" 
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "3"
+  }
+}
+operand {
+  name: "ker"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 1 dim: 3 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+
+operation {
+  type: "TransposeConv"
+  transpose_conv_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "out_shape"
+  input: "ker"
+  input: "ifm"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.rule b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.rule
new file mode 100644
index 000000000..50f235a55
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "KER_S16"               $(tensor_dtype ker) '=' INT16
+RULE    "BIAS_S64"              $(tensor_dtype bias) '=' INT64
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.recipe
new file mode 100644
index 000000000..c281b0482
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.recipe
@@ -0,0 +1,54 @@
+operand {
+  name: "out_shape"
+  type: INT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "4" arg: "4" arg: "3" 
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 3 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "3"
+  }
+}
+operand {
+  name: "ker"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 1 dim: 3 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+
+operation {
+  type: "TransposeConv"
+  transpose_conv_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "out_shape"
+  input: "ker"
+  input: "ifm"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.rule b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.rule
new file mode 100644
index 000000000..ffa3bc906
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "KER_U8"                 $(tensor_dtype ker) '=' UINT8
+RULE    "BIAS_S32"               $(tensor_dtype bias) '=' INT32
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.recipe
new file mode 100644
index 000000000..82a85c13b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 8 dim: 1 }
+}
+operand {
+  name: "perm"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" arg: "0" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 8 dim: 1 dim: 3 }
+}
+
+operation {
+  type: "Transpose"
+  transpose_options {
+  }
+  input: "ifm"
+  input: "perm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"    $(verify_file_format) '=' 1
+
+RULE    "IFM_U8"                $(tensor_dtype ifm) '=' UINT8
+RULE    "IFM_QUANTIZE_S16"      $(tensor_dtype ifm_Quantize) '=' INT16
+RULE    "TARGET_S16"            $(tensor_dtype ofm) '=' INT16
+RULE    "OUTPUT_S16"            $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "int16",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.recipe
new file mode 100644
index 000000000..82a85c13b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 8 dim: 1 }
+}
+operand {
+  name: "perm"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" arg: "0" }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 8 dim: 1 dim: 3 }
+}
+
+operation {
+  type: "Transpose"
+  transpose_options {
+  }
+  input: "ifm"
+  input: "perm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"     $(verify_file_format) '=' 1
+
+RULE    "IFM_S16"                $(tensor_dtype ifm) '=' INT16
+RULE    "IFM_QUANTIZE_U8"        $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE    "TARGET_U8"              $(tensor_dtype ofm) '=' UINT8
+RULE    "OUTPUT_S16"             $(tensor_dtype ofm_Quantize) '=' INT16
+RULE    "QUANTIZE_OP"            $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/StridedSlice_004/test.recipe b/res/TensorFlowLiteRecipes/StridedSlice_004/test.recipe
new file mode 100644
index 000000000..edc8efd9d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/StridedSlice_004/test.recipe
@@ -0,0 +1,46 @@
+#
+# Failed case from https://github.com/Samsung/ONE/issues/9439
+#
+operand {
+  name: "Placeholder"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 32 }
+  is_variable: false
+}
+operand {
+  name: "strided_slice/stack_2"
+  type: INT32
+  shape { dim: 4 }
+  filler { tag: "explicit" arg: "1" arg: "-1" arg: "1" arg: "1" }
+  is_variable: false
+}
+operand {
+  name: "strided_slice/stack"
+  type: INT32
+  shape { dim: 4 }
+  filler { tag: "explicit" arg: "0" arg: "0" arg: "0" arg: "0" }
+  is_variable: false
+}
+operand {
+  name: "strided_slice"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 dim: 16 dim: 32 }
+  is_variable: false
+}
+operation {
+  type: "StridedSlice"
+  input: "Placeholder"
+  input: "strided_slice/stack"
+  input: "strided_slice/stack"
+  input: "strided_slice/stack_2"
+  output: "strided_slice"
+  strided_slice_options {
+    begin_mask: 15
+    end_mask: 15
+    ellipsis_mask: 0
+    new_axis_mask: 0
+    shrink_axis_mask: 0
+  }
+}
+input: "Placeholder"
+output: "strided_slice"
diff --git a/res/TensorFlowLiteRecipes/StridedSlice_004/test.reverse b/res/TensorFlowLiteRecipes/StridedSlice_004/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/StridedSlice_004/test.reverse
diff --git a/res/TensorFlowPythonExamples/examples/AddV2/__init__.py b/res/TensorFlowPythonExamples/examples/AddV2/__init__.py
index 8114c50b1..0cfa27e43 100644
--- a/res/TensorFlowPythonExamples/examples/AddV2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/AddV2/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.raw_ops.AddV2(x=lhs_, y=rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/BatchMatMulV2/__init__.py b/res/TensorFlowPythonExamples/examples/BatchMatMulV2/__init__.py
index b9f7a1cc1..5f851cb21 100644
--- a/res/TensorFlowPythonExamples/examples/BatchMatMulV2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/BatchMatMulV2/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 5, 4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.raw_ops.BatchMatMulV2(x=lhs_, y=rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/Bidirectional_LSTM/__init__.py b/res/TensorFlowPythonExamples/examples/Bidirectional_LSTM/__init__.py
index d28034bf9..b4f0297a3 100644
--- a/res/TensorFlowPythonExamples/examples/Bidirectional_LSTM/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/Bidirectional_LSTM/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[28, 28, 3], name="Hole")
 
 op_uni_ = tf.compat.v1.keras.layers.LSTM(1, time_major=False, return_sequences=True)
diff --git a/res/TensorFlowPythonExamples/examples/PadV2/__init__.py b/res/TensorFlowPythonExamples/examples/PadV2/__init__.py
index 99940bf85..995efd5ee 100644
--- a/res/TensorFlowPythonExamples/examples/PadV2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/PadV2/__init__.py
@@ -1,6 +1,8 @@
 import tensorflow as tf
 import numpy as np
 
+tf.compat.v1.disable_eager_execution()
+
 input_ = tf.compat.v1.placeholder(shape=[1, 1, 1, 1], dtype=tf.float32)
 paddings_ = tf.compat.v1.constant(
     np.array([[1, 1], [2, 2], [3, 3], [4, 4]], dtype=np.int32))
diff --git a/res/TensorFlowPythonExamples/examples/abs/__init__.py b/res/TensorFlowPythonExamples/examples/abs/__init__.py
index fd5515595..83ac3cb33 100755
--- a/res/TensorFlowPythonExamples/examples/abs/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/abs/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 abs_ = tf.compat.v1.abs(in_)
diff --git a/res/TensorFlowPythonExamples/examples/add/__init__.py b/res/TensorFlowPythonExamples/examples/add/__init__.py
index 7e283f35f..39790a0e5 100755
--- a/res/TensorFlowPythonExamples/examples/add/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/add/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.add(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/add_n/__init__.py b/res/TensorFlowPythonExamples/examples/add_n/__init__.py
index afd068d0d..c8e23c940 100644
--- a/res/TensorFlowPythonExamples/examples/add_n/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/add_n/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in1_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 in2_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 in3_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/argmax/__init__.py b/res/TensorFlowPythonExamples/examples/argmax/__init__.py
index 059df97f9..b8791b46e 100755
--- a/res/TensorFlowPythonExamples/examples/argmax/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/argmax/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.math.argmax(in_)
diff --git a/res/TensorFlowPythonExamples/examples/argmin/__init__.py b/res/TensorFlowPythonExamples/examples/argmin/__init__.py
index f9a54627f..39f3278a5 100644
--- a/res/TensorFlowPythonExamples/examples/argmin/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/argmin/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.math.argmin(in_)
diff --git a/res/TensorFlowPythonExamples/examples/atrous_conv2d/__init__.py b/res/TensorFlowPythonExamples/examples/atrous_conv2d/__init__.py
index 90756b0b0..c430749f3 100644
--- a/res/TensorFlowPythonExamples/examples/atrous_conv2d/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/atrous_conv2d/__init__.py
@@ -1,6 +1,8 @@
 import tensorflow as tf
 import numpy as np
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 3), name="Hole")
 
 filters = np.random.uniform(low=-1., high=1, size=[5, 5, 3, 32]).astype(np.float32)
diff --git a/res/TensorFlowPythonExamples/examples/average_pool_2d/__init__.py b/res/TensorFlowPythonExamples/examples/average_pool_2d/__init__.py
index a8ab0ddc4..814cf5787 100644
--- a/res/TensorFlowPythonExamples/examples/average_pool_2d/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/average_pool_2d/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 8, 8, 1), name="Hole")
 op_ = tf.compat.v1.nn.avg_pool2d(in_, (2, 2), 1, "VALID")
diff --git a/res/TensorFlowPythonExamples/examples/batch_normalization/__init__.py b/res/TensorFlowPythonExamples/examples/batch_normalization/__init__.py
index e86555220..4a7787073 100644
--- a/res/TensorFlowPythonExamples/examples/batch_normalization/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/batch_normalization/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 mean = tf.compat.v1.constant([1., 2., 3.])
 variance = tf.compat.v1.constant([4., 5., 6.])
 offset = tf.compat.v1.constant([7., 8., 9.])
diff --git a/res/TensorFlowPythonExamples/examples/batch_to_space/__init__.py b/res/TensorFlowPythonExamples/examples/batch_to_space/__init__.py
index 1dd08b0ee..9efa85c2d 100644
--- a/res/TensorFlowPythonExamples/examples/batch_to_space/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/batch_to_space/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=[4, 1, 1, 1], name="Hole")
 cr_ = tf.constant([[0, 0], [0, 0]], name="Hole")
 op_ = tf.batch_to_space(in_, cr_, 2)
diff --git a/res/TensorFlowPythonExamples/examples/biasadd/__init__.py b/res/TensorFlowPythonExamples/examples/biasadd/__init__.py
index eb8a69bc3..72ffe10ae 100755
--- a/res/TensorFlowPythonExamples/examples/biasadd/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/biasadd/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1, 2, 3), name="Hole")
 op_ = tf.nn.bias_add(in_, bias=[1.0, 1.0, -1.0], data_format="NHWC")
diff --git a/res/TensorFlowPythonExamples/examples/cast/__init__.py b/res/TensorFlowPythonExamples/examples/cast/__init__.py
index 4c0adc09f..5919e0de2 100644
--- a/res/TensorFlowPythonExamples/examples/cast/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/cast/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 cast_ = tf.cast(in_, tf.int32)
diff --git a/res/TensorFlowPythonExamples/examples/ceil/__init__.py b/res/TensorFlowPythonExamples/examples/ceil/__init__.py
index 5178f8fe8..79737c8ab 100755
--- a/res/TensorFlowPythonExamples/examples/ceil/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/ceil/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.ceil(in_)
diff --git a/res/TensorFlowPythonExamples/examples/concat/__init__.py b/res/TensorFlowPythonExamples/examples/concat/__init__.py
index ec59b242f..c1c7b1aeb 100644
--- a/res/TensorFlowPythonExamples/examples/concat/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/concat/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in1_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4), name="Hole1")
 in2_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2, 4), name="Hole2")
 concat_ = tf.compat.v1.concat([in1_, in2_], axis=-2)
diff --git a/res/TensorFlowPythonExamples/examples/cond/__init__.py b/res/TensorFlowPythonExamples/examples/cond/__init__.py
index deafbb162..660ec9b84 100644
--- a/res/TensorFlowPythonExamples/examples/cond/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/cond/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleX")
 y_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleY")
 z_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleZ")
diff --git a/res/TensorFlowPythonExamples/examples/cond_1/__init__.py b/res/TensorFlowPythonExamples/examples/cond_1/__init__.py
index fed192018..da8809482 100644
--- a/res/TensorFlowPythonExamples/examples/cond_1/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/cond_1/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleX")
 y_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleY")
 z_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleZ")
diff --git a/res/TensorFlowPythonExamples/examples/conv2d_1/__init__.py b/res/TensorFlowPythonExamples/examples/conv2d_1/__init__.py
index fa4f72f99..7cf8dee52 100644
--- a/res/TensorFlowPythonExamples/examples/conv2d_1/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/conv2d_1/__init__.py
@@ -1,6 +1,8 @@
 import tensorflow as tf
 import numpy as np
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 3), name="Hole")
 
 filters = np.random.uniform(low=-1., high=1, size=[5, 5, 3, 32]).astype(np.float32)
diff --git a/res/TensorFlowPythonExamples/examples/conv2d_2/__init__.py b/res/TensorFlowPythonExamples/examples/conv2d_2/__init__.py
index 680bb36fd..812fef12b 100644
--- a/res/TensorFlowPythonExamples/examples/conv2d_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/conv2d_2/__init__.py
@@ -1,6 +1,8 @@
 import tensorflow as tf
 import numpy as np
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 3), name="Hole")
 
 filters = np.random.uniform(low=-1., high=1, size=[5, 5, 3, 32]).astype(np.float32)
diff --git a/res/TensorFlowPythonExamples/examples/conv2d_transpose/__init__.py b/res/TensorFlowPythonExamples/examples/conv2d_transpose/__init__.py
index 17fd6e20a..cd317cee9 100644
--- a/res/TensorFlowPythonExamples/examples/conv2d_transpose/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/conv2d_transpose/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 input_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 8, 8, 1), name="Hole")
 kernel_ = tf.compat.v1.placeholder(tf.float32, shape=(3, 3, 1, 1), name="Hole")
 op_ = tf.compat.v1.nn.conv2d_transpose(
diff --git a/res/TensorFlowPythonExamples/examples/cos/__init__.py b/res/TensorFlowPythonExamples/examples/cos/__init__.py
index cfce5d830..3271ddb96 100755
--- a/res/TensorFlowPythonExamples/examples/cos/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/cos/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.cos(in_)
diff --git a/res/TensorFlowPythonExamples/examples/depth_to_space/__init__.py b/res/TensorFlowPythonExamples/examples/depth_to_space/__init__.py
index 0cbc304fa..c11766ed0 100644
--- a/res/TensorFlowPythonExamples/examples/depth_to_space/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/depth_to_space/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=[1, 1, 1, 4], name="Hole")
 op_ = tf.nn.depth_to_space(in_, 2)
diff --git a/res/TensorFlowPythonExamples/examples/depthwise_conv2d_1/__init__.py b/res/TensorFlowPythonExamples/examples/depthwise_conv2d_1/__init__.py
index 7df1938cc..a9c8b33eb 100644
--- a/res/TensorFlowPythonExamples/examples/depthwise_conv2d_1/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/depthwise_conv2d_1/__init__.py
@@ -1,6 +1,8 @@
 import tensorflow as tf
 import numpy as np
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 4), name="Hole")
 
 filters = np.array(
diff --git a/res/TensorFlowPythonExamples/examples/depthwise_conv2d_2/__init__.py b/res/TensorFlowPythonExamples/examples/depthwise_conv2d_2/__init__.py
index 4800ebd82..8fbd0da49 100644
--- a/res/TensorFlowPythonExamples/examples/depthwise_conv2d_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/depthwise_conv2d_2/__init__.py
@@ -1,6 +1,8 @@
 import tensorflow as tf
 import numpy as np
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 4), name="Hole")
 
 filters = np.array(
diff --git a/res/TensorFlowPythonExamples/examples/div/__init__.py b/res/TensorFlowPythonExamples/examples/div/__init__.py
index 2887771ff..9acf9166b 100755
--- a/res/TensorFlowPythonExamples/examples/div/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/div/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.div(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/elu/__init__.py b/res/TensorFlowPythonExamples/examples/elu/__init__.py
index b41f65111..91c620927 100755
--- a/res/TensorFlowPythonExamples/examples/elu/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/elu/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 elu_ = tf.compat.v1.nn.elu(in_)
diff --git a/res/TensorFlowPythonExamples/examples/exp/__init__.py b/res/TensorFlowPythonExamples/examples/exp/__init__.py
index e83638436..5a7c88d8c 100644
--- a/res/TensorFlowPythonExamples/examples/exp/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/exp/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.exp(in_)
diff --git a/res/TensorFlowPythonExamples/examples/expand_dims_00/__init__.py b/res/TensorFlowPythonExamples/examples/expand_dims_00/__init__.py
index ab6a87fc7..1f99c1107 100644
--- a/res/TensorFlowPythonExamples/examples/expand_dims_00/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/expand_dims_00/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 # example 1 where input has all known dims and axis is const
 
 in_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(2, 3), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/expand_dims_01/__init__.py b/res/TensorFlowPythonExamples/examples/expand_dims_01/__init__.py
index 36c54753b..1b1626a32 100644
--- a/res/TensorFlowPythonExamples/examples/expand_dims_01/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/expand_dims_01/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 # example 2 where input has unknown dim and axis is const
 
 in_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(None, None), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/expand_dims_02/__init__.py b/res/TensorFlowPythonExamples/examples/expand_dims_02/__init__.py
index 6304c2344..c73b0ba2f 100644
--- a/res/TensorFlowPythonExamples/examples/expand_dims_02/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/expand_dims_02/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 # example 3 where input has all known dim and axis is not const
 
 in_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(2, 3), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/fill/__init__.py b/res/TensorFlowPythonExamples/examples/fill/__init__.py
index f8413bb36..1c9d20476 100644
--- a/res/TensorFlowPythonExamples/examples/fill/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/fill/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(), name="Hole")
 op_ = tf.compat.v1.fill((3, 4), in_)
diff --git a/res/TensorFlowPythonExamples/examples/flatten/__init__.py b/res/TensorFlowPythonExamples/examples/flatten/__init__.py
index bb6dbaa2b..3f135688e 100644
--- a/res/TensorFlowPythonExamples/examples/flatten/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/flatten/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 3), name="Hole")
 
 op_ = tf.compat.v1.layers.flatten(in_)
diff --git a/res/TensorFlowPythonExamples/examples/floor/__init__.py b/res/TensorFlowPythonExamples/examples/floor/__init__.py
index 3b3f5bfc3..0357cee3b 100755
--- a/res/TensorFlowPythonExamples/examples/floor/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/floor/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.floor(in_)
diff --git a/res/TensorFlowPythonExamples/examples/floordiv/__init__.py b/res/TensorFlowPythonExamples/examples/floordiv/__init__.py
index 34f413f2b..5714bf563 100755
--- a/res/TensorFlowPythonExamples/examples/floordiv/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/floordiv/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.floordiv(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/floormod/__init__.py b/res/TensorFlowPythonExamples/examples/floormod/__init__.py
index c06e2a9ed..f4e1a5f33 100644
--- a/res/TensorFlowPythonExamples/examples/floormod/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/floormod/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.floormod(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/fused_batch_norm/__init__.py b/res/TensorFlowPythonExamples/examples/fused_batch_norm/__init__.py
index 5e13b0d82..628420c3b 100644
--- a/res/TensorFlowPythonExamples/examples/fused_batch_norm/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/fused_batch_norm/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 scale = tf.compat.v1.constant([1., 2., 3.])
 offset = tf.compat.v1.constant([4., 5., 6.])
 mean = tf.constant([1., 2., 3.])
diff --git a/res/TensorFlowPythonExamples/examples/gather/__init__.py b/res/TensorFlowPythonExamples/examples/gather/__init__.py
index 173be4a97..67b4d07fc 100644
--- a/res/TensorFlowPythonExamples/examples/gather/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/gather/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 param_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 2, 3, 4), name="Hole")
 indices_ = tf.constant([1, 2])
 op_ = tf.gather(param_, indices_, axis=2)
diff --git a/res/TensorFlowPythonExamples/examples/gather_nd/__init__.py b/res/TensorFlowPythonExamples/examples/gather_nd/__init__.py
index 1ff11d568..8c0df3629 100644
--- a/res/TensorFlowPythonExamples/examples/gather_nd/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/gather_nd/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 param_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2, 2), name="Hole")
 indices_ = tf.constant([[0, 1], [1, 0]])
 op_ = tf.gather_nd(param_, indices_)
diff --git a/res/TensorFlowPythonExamples/examples/greater/__init__.py b/res/TensorFlowPythonExamples/examples/greater/__init__.py
index e88f57471..b8578e3b2 100755
--- a/res/TensorFlowPythonExamples/examples/greater/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/greater/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.greater(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/greater_equal/__init__.py b/res/TensorFlowPythonExamples/examples/greater_equal/__init__.py
index b15fbd324..cf10e4d4e 100755
--- a/res/TensorFlowPythonExamples/examples/greater_equal/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/greater_equal/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.greater_equal(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/gru/__init__.py b/res/TensorFlowPythonExamples/examples/gru/__init__.py
index 26ee75d2e..0d4718937 100755
--- a/res/TensorFlowPythonExamples/examples/gru/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/gru/__init__.py
@@ -1,6 +1,8 @@
 import tensorflow as tf
 from tensorflow import keras
 
+tf.compat.v1.disable_eager_execution()
+
 model = keras.Sequential()
 shape = (4, 4)
 model.add(keras.layers.GRU(2, input_shape=shape))
diff --git a/res/TensorFlowPythonExamples/examples/instance_norm/__init__.py b/res/TensorFlowPythonExamples/examples/instance_norm/__init__.py
index b44942c39..62a774e4a 100644
--- a/res/TensorFlowPythonExamples/examples/instance_norm/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/instance_norm/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 sess = tf.Session()
 
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 3), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/l2_normalize/__init__.py b/res/TensorFlowPythonExamples/examples/l2_normalize/__init__.py
index 0dda6bfc8..fe26e0684 100644
--- a/res/TensorFlowPythonExamples/examples/l2_normalize/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/l2_normalize/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 arg = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.math.l2_normalize(arg)
diff --git a/res/TensorFlowPythonExamples/examples/leaky_relu/__init__.py b/res/TensorFlowPythonExamples/examples/leaky_relu/__init__.py
index d595edbd0..c1899de56 100755
--- a/res/TensorFlowPythonExamples/examples/leaky_relu/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/leaky_relu/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.nn.leaky_relu(in_)
diff --git a/res/TensorFlowPythonExamples/examples/less/__init__.py b/res/TensorFlowPythonExamples/examples/less/__init__.py
index 41ba18c62..6fee74aa5 100755
--- a/res/TensorFlowPythonExamples/examples/less/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/less/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.less(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/less_equal/__init__.py b/res/TensorFlowPythonExamples/examples/less_equal/__init__.py
index d60bf2a73..fdca6490a 100755
--- a/res/TensorFlowPythonExamples/examples/less_equal/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/less_equal/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.less_equal(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/local_response_normalization/__init__.py b/res/TensorFlowPythonExamples/examples/local_response_normalization/__init__.py
index eca6b2267..c358bd06e 100644
--- a/res/TensorFlowPythonExamples/examples/local_response_normalization/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/local_response_normalization/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 4, 4, 20), name="Hole")
 op_ = tf.compat.v1.nn.lrn(x_, 5, 1.0, 1.0, 0.5)
diff --git a/res/TensorFlowPythonExamples/examples/log/__init__.py b/res/TensorFlowPythonExamples/examples/log/__init__.py
index cb206c058..d8787ef7d 100644
--- a/res/TensorFlowPythonExamples/examples/log/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/log/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.math.log(in_)
diff --git a/res/TensorFlowPythonExamples/examples/log_softmax/__init__.py b/res/TensorFlowPythonExamples/examples/log_softmax/__init__.py
index 651888c71..a13f211c7 100644
--- a/res/TensorFlowPythonExamples/examples/log_softmax/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/log_softmax/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.nn.log_softmax(in_)
diff --git a/res/TensorFlowPythonExamples/examples/log_softmax_2/__init__.py b/res/TensorFlowPythonExamples/examples/log_softmax_2/__init__.py
index c3d458942..856ebd968 100644
--- a/res/TensorFlowPythonExamples/examples/log_softmax_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/log_softmax_2/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4, 5), name="Hole")
 op_ = tf.compat.v1.nn.log_softmax(in_, axis=1)
diff --git a/res/TensorFlowPythonExamples/examples/logical_and/__init__.py b/res/TensorFlowPythonExamples/examples/logical_and/__init__.py
index f546fae9f..d0c4ea2ac 100755
--- a/res/TensorFlowPythonExamples/examples/logical_and/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/logical_and/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.logical_and(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/logical_not/__init__.py b/res/TensorFlowPythonExamples/examples/logical_not/__init__.py
index f1bcc2c8f..532d5ff1f 100755
--- a/res/TensorFlowPythonExamples/examples/logical_not/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/logical_not/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.logical_not(in_)
diff --git a/res/TensorFlowPythonExamples/examples/logical_or/__init__.py b/res/TensorFlowPythonExamples/examples/logical_or/__init__.py
index 991d61ab9..ce584eaf4 100755
--- a/res/TensorFlowPythonExamples/examples/logical_or/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/logical_or/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.logical_or(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/lstm/__init__.py b/res/TensorFlowPythonExamples/examples/lstm/__init__.py
index c07948bde..99ef3c27f 100755
--- a/res/TensorFlowPythonExamples/examples/lstm/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/lstm/__init__.py
@@ -1,6 +1,8 @@
 import tensorflow as tf
 from tensorflow import keras
 
+tf.compat.v1.disable_eager_execution()
+
 model = keras.Sequential()
 shape = (4, 4)
 model.add(keras.layers.LSTM(2, input_shape=shape))
diff --git a/res/TensorFlowPythonExamples/examples/matmul/__init__.py b/res/TensorFlowPythonExamples/examples/matmul/__init__.py
index 760241de7..6f049e50a 100755
--- a/res/TensorFlowPythonExamples/examples/matmul/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/matmul/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 4), name="Hole")
 rhs_ = tf.compat.v1.constant(dtype=tf.float32, shape=(4, 4), name="Hole", value=1.0)
 op_ = tf.compat.v1.matmul(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/matrix_band_part/__init__.py b/res/TensorFlowPythonExamples/examples/matrix_band_part/__init__.py
index 43d4d8754..a708f35c4 100644
--- a/res/TensorFlowPythonExamples/examples/matrix_band_part/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/matrix_band_part/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.matrix_band_part(in_, 1, -1)
diff --git a/res/TensorFlowPythonExamples/examples/matrix_diag/__init__.py b/res/TensorFlowPythonExamples/examples/matrix_diag/__init__.py
index 384a29853..cd789eaca 100644
--- a/res/TensorFlowPythonExamples/examples/matrix_diag/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/matrix_diag/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.matrix_diag(in_)
diff --git a/res/TensorFlowPythonExamples/examples/matrix_set_diag/__init__.py b/res/TensorFlowPythonExamples/examples/matrix_set_diag/__init__.py
index e8878f02f..55b869037 100644
--- a/res/TensorFlowPythonExamples/examples/matrix_set_diag/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/matrix_set_diag/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4), name="Hole")
 diag_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3), name="Hole")
 op_ = tf.compat.v1.matrix_set_diag(in_, diag_)
diff --git a/res/TensorFlowPythonExamples/examples/max_pool_with_argmax/__init__.py b/res/TensorFlowPythonExamples/examples/max_pool_with_argmax/__init__.py
index 487858cc5..78daa034c 100755
--- a/res/TensorFlowPythonExamples/examples/max_pool_with_argmax/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/max_pool_with_argmax/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 4, 4, 1), name="Hole")
 op_ = tf.compat.v1.nn.max_pool_with_argmax(
     in_, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding="VALID")
diff --git a/res/TensorFlowPythonExamples/examples/maximum/__init__.py b/res/TensorFlowPythonExamples/examples/maximum/__init__.py
index a96fe03a7..0656ba4e6 100755
--- a/res/TensorFlowPythonExamples/examples/maximum/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/maximum/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.maximum(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/minimum/__init__.py b/res/TensorFlowPythonExamples/examples/minimum/__init__.py
index ef664dbf6..ebd795e38 100755
--- a/res/TensorFlowPythonExamples/examples/minimum/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/minimum/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.minimum(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/multiply/__init__.py b/res/TensorFlowPythonExamples/examples/multiply/__init__.py
index da8885660..68dff1e61 100755
--- a/res/TensorFlowPythonExamples/examples/multiply/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/multiply/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.multiply(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/negative/__init__.py b/res/TensorFlowPythonExamples/examples/negative/__init__.py
index 86713da7b..473dc9b97 100644
--- a/res/TensorFlowPythonExamples/examples/negative/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/negative/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 neg_ = tf.math.negative(in_)
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py
index b8f010c67..2598b531b 100644
--- a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 max_output_size = tf.compat.v1.constant(4)
 
 in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8, 4), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py
index 42e7bf06c..932ad3534 100644
--- a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 max_output_size = tf.compat.v1.constant(6)
 iou_threshold = tf.compat.v1.constant(0.5)
 score_threshold = tf.compat.v1.constant(0.6)
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py
index 32c6173b0..c251b9271 100644
--- a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 max_output_size = tf.compat.v1.constant(4)
 
 in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8, 4), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py
index 415f9209f..a7185c3ee 100644
--- a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 max_output_size = tf.compat.v1.constant(6)
 iou_threshold = tf.compat.v1.constant(0.5)
 score_threshold = tf.compat.v1.constant(0.6)
diff --git a/res/TensorFlowPythonExamples/examples/not_equal/__init__.py b/res/TensorFlowPythonExamples/examples/not_equal/__init__.py
index 95073fe4a..955eb1f9f 100755
--- a/res/TensorFlowPythonExamples/examples/not_equal/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/not_equal/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.not_equal(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/one_hot/__init__.py b/res/TensorFlowPythonExamples/examples/one_hot/__init__.py
index 49e0346d3..b99bb9ca0 100644
--- a/res/TensorFlowPythonExamples/examples/one_hot/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/one_hot/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 indice_ = tf.compat.v1.placeholder(tf.int32, shape=(1, 2, 3, 4), name='Hole')
 depth_ = tf.compat.v1.placeholder(tf.int32, shape=(), name='Hole')
 on_value_ = tf.compat.v1.placeholder(tf.int32, shape=(), name='Hole')
diff --git a/res/TensorFlowPythonExamples/examples/pack/__init__.py b/res/TensorFlowPythonExamples/examples/pack/__init__.py
index 609bc9b76..4f1c46baa 100755
--- a/res/TensorFlowPythonExamples/examples/pack/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/pack/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_1 = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4), name="Hole")
 in_2 = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4), name="Hole")
 op_ = tf.compat.v1.stack([in_1, in_2])
diff --git a/res/TensorFlowPythonExamples/examples/pad-reflect/__init__.py b/res/TensorFlowPythonExamples/examples/pad-reflect/__init__.py
index dc877f119..a78e21571 100644
--- a/res/TensorFlowPythonExamples/examples/pad-reflect/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/pad-reflect/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 tensor_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3), name="Hole")
 paddings_ = tf.constant([[1, 1], [2, 2]], name="Hole")
 op_ = tf.pad(tensor_, paddings_, "REFLECT")
diff --git a/res/TensorFlowPythonExamples/examples/pad/__init__.py b/res/TensorFlowPythonExamples/examples/pad/__init__.py
index ac5cf81fa..7097b7592 100755
--- a/res/TensorFlowPythonExamples/examples/pad/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/pad/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 tensor_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3), name="Hole")
 paddings_ = tf.compat.v1.constant([[1, 1], [2, 2]], name="Hole")
 op_ = tf.compat.v1.pad(tensor_, paddings_)
diff --git a/res/TensorFlowPythonExamples/examples/pow/__init__.py b/res/TensorFlowPythonExamples/examples/pow/__init__.py
index 960032a84..12a19f2b0 100755
--- a/res/TensorFlowPythonExamples/examples/pow/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/pow/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.pow(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/prelu/__init__.py b/res/TensorFlowPythonExamples/examples/prelu/__init__.py
index b0e7c7b9d..7e43f5101 100644
--- a/res/TensorFlowPythonExamples/examples/prelu/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/prelu/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 input_tensor = tf.compat.v1.placeholder(
     dtype=tf.float32, name="input", shape=[1, 4, 4, 3])
 prelu = tf.keras.layers.PReLU(shared_axes=[1, 2])
diff --git a/res/TensorFlowPythonExamples/examples/range/__init__.py b/res/TensorFlowPythonExamples/examples/range/__init__.py
index 0f032e9d1..9b57167b0 100644
--- a/res/TensorFlowPythonExamples/examples/range/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/range/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 # this modified example comes from TF API reference
 start = 1
 limit = 10
diff --git a/res/TensorFlowPythonExamples/examples/rank/__init__.py b/res/TensorFlowPythonExamples/examples/rank/__init__.py
index c9b970718..ab2bc79dc 100644
--- a/res/TensorFlowPythonExamples/examples/rank/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/rank/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4, 3, 3), name="Hole")
 rank_ = tf.compat.v1.rank(in_)
diff --git a/res/TensorFlowPythonExamples/examples/reduce_all/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_all/__init__.py
index eb9167f72..2fee752d4 100644
--- a/res/TensorFlowPythonExamples/examples/reduce_all/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reduce_all/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 input_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(2, 4), name="Hole")
 op_ = tf.compat.v1.reduce_all(input_, axis=1, keepdims=False)
diff --git a/res/TensorFlowPythonExamples/examples/reduce_any/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_any/__init__.py
index f87c25166..0e87a0c6e 100644
--- a/res/TensorFlowPythonExamples/examples/reduce_any/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reduce_any/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(2, 2), name="Hole")
 op_ = tf.compat.v1.math.reduce_any(in_)
diff --git a/res/TensorFlowPythonExamples/examples/reduce_max/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_max/__init__.py
index 27e48df72..dc5e0d648 100644
--- a/res/TensorFlowPythonExamples/examples/reduce_max/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reduce_max/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2), name="Hole")
 op_ = tf.compat.v1.math.reduce_max(in_)
diff --git a/res/TensorFlowPythonExamples/examples/reduce_min/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_min/__init__.py
index b3cf0346a..fe81336d4 100644
--- a/res/TensorFlowPythonExamples/examples/reduce_min/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reduce_min/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2), name="Hole")
 op_ = tf.compat.v1.math.reduce_min(in_)
diff --git a/res/TensorFlowPythonExamples/examples/reduce_prod/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_prod/__init__.py
index 4d134ae32..9fe2ee295 100644
--- a/res/TensorFlowPythonExamples/examples/reduce_prod/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reduce_prod/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2), name="Hole")
 op_ = tf.compat.v1.math.reduce_prod(in_)
diff --git a/res/TensorFlowPythonExamples/examples/relu/__init__.py b/res/TensorFlowPythonExamples/examples/relu/__init__.py
index a144a1212..69e075332 100755
--- a/res/TensorFlowPythonExamples/examples/relu/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/relu/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.nn.relu(in_)
diff --git a/res/TensorFlowPythonExamples/examples/relu6/__init__.py b/res/TensorFlowPythonExamples/examples/relu6/__init__.py
index f58ae7c2c..d581d3936 100755
--- a/res/TensorFlowPythonExamples/examples/relu6/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/relu6/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.nn.relu6(in_)
diff --git a/res/TensorFlowPythonExamples/examples/reshape/__init__.py b/res/TensorFlowPythonExamples/examples/reshape/__init__.py
index f451bacb9..c60c0a6d8 100644
--- a/res/TensorFlowPythonExamples/examples/reshape/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reshape/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.reshape(in_, shape=[2, 2, 2, 2])
diff --git a/res/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py b/res/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py
index 422bf1db5..773fc07c9 100755
--- a/res/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 8, 8, 3), name="Hole")
 op_ = tf.compat.v1.image.resize_bilinear(in_, [16, 16])
diff --git a/res/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py b/res/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py
index a14022948..3e688d328 100755
--- a/res/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 8, 8, 3), name="Hole")
 op_ = tf.compat.v1.image.resize_nearest_neighbor(in_, [16, 16])
diff --git a/res/TensorFlowPythonExamples/examples/reverse_sequence/__init__.py b/res/TensorFlowPythonExamples/examples/reverse_sequence/__init__.py
index aebd4fc50..4b7a9cf26 100755
--- a/res/TensorFlowPythonExamples/examples/reverse_sequence/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reverse_sequence/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 8), name="Hole")
 op_ = tf.compat.v1.reverse_sequence(in_, [7, 2, 3, 5], seq_axis=1, batch_axis=0)
diff --git a/res/TensorFlowPythonExamples/examples/reverse_v2/__init__.py b/res/TensorFlowPythonExamples/examples/reverse_v2/__init__.py
index e6afc995c..0404cd660 100755
--- a/res/TensorFlowPythonExamples/examples/reverse_v2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reverse_v2/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4, 5), name="Hole")
 op_ = tf.compat.v1.reverse_v2(in_, [3, 2])
diff --git a/res/TensorFlowPythonExamples/examples/rnn/__init__.py b/res/TensorFlowPythonExamples/examples/rnn/__init__.py
index 5e76951c2..9c1e69c2e 100755
--- a/res/TensorFlowPythonExamples/examples/rnn/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/rnn/__init__.py
@@ -1,6 +1,8 @@
 import tensorflow as tf
 from tensorflow import keras
 
+tf.compat.v1.disable_eager_execution()
+
 model = keras.Sequential()
 shape = (4, 4)
 model.add(keras.layers.SimpleRNN(2, input_shape=shape))
diff --git a/res/TensorFlowPythonExamples/examples/round/__init__.py b/res/TensorFlowPythonExamples/examples/round/__init__.py
index 9a00ad558..6cda033e2 100755
--- a/res/TensorFlowPythonExamples/examples/round/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/round/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.round(in_)
diff --git a/res/TensorFlowPythonExamples/examples/rsqrt/__init__.py b/res/TensorFlowPythonExamples/examples/rsqrt/__init__.py
index 90500bd11..dc81e48aa 100755
--- a/res/TensorFlowPythonExamples/examples/rsqrt/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/rsqrt/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.rsqrt(in_)
diff --git a/res/TensorFlowPythonExamples/examples/scatter_nd/__init__.py b/res/TensorFlowPythonExamples/examples/scatter_nd/__init__.py
index e094b5705..0158e3ca6 100644
--- a/res/TensorFlowPythonExamples/examples/scatter_nd/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/scatter_nd/__init__.py
@@ -2,6 +2,8 @@
 
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 indices = tf.compat.v1.constant([[0], [2]])
 updates = tf.compat.v1.constant([[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
                                  [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8,
diff --git a/res/TensorFlowPythonExamples/examples/segment_sum/__init__.py b/res/TensorFlowPythonExamples/examples/segment_sum/__init__.py
index 24d15bb8b..c15746a66 100755
--- a/res/TensorFlowPythonExamples/examples/segment_sum/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/segment_sum/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4, 4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(4, ), name="Hole")
 op_ = tf.compat.v1.math.segment_sum(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/shape/__init__.py b/res/TensorFlowPythonExamples/examples/shape/__init__.py
index 4c13a338f..b719eb9fc 100644
--- a/res/TensorFlowPythonExamples/examples/shape/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/shape/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(None, 2, 3), name="Hole")
 op_ = tf.compat.v1.shape(in_)
diff --git a/res/TensorFlowPythonExamples/examples/sigmoid/__init__.py b/res/TensorFlowPythonExamples/examples/sigmoid/__init__.py
index 43328f2cb..1749071f0 100755
--- a/res/TensorFlowPythonExamples/examples/sigmoid/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/sigmoid/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.nn.sigmoid(in_)
diff --git a/res/TensorFlowPythonExamples/examples/sin/__init__.py b/res/TensorFlowPythonExamples/examples/sin/__init__.py
index 0bfdcffed..75ea73b85 100644
--- a/res/TensorFlowPythonExamples/examples/sin/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/sin/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.sin(in_)
diff --git a/res/TensorFlowPythonExamples/examples/slice/__init__.py b/res/TensorFlowPythonExamples/examples/slice/__init__.py
index 45f9044d1..b734dc22b 100644
--- a/res/TensorFlowPythonExamples/examples/slice/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/slice/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 2, 3), name="Hole")
 op_ = tf.compat.v1.slice(in_, [1, 0, 0], [1, 1, 3])
diff --git a/res/TensorFlowPythonExamples/examples/softmax/__init__.py b/res/TensorFlowPythonExamples/examples/softmax/__init__.py
index 5b8d1cdfb..3c93e8a2b 100755
--- a/res/TensorFlowPythonExamples/examples/softmax/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/softmax/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.nn.softmax(in_)
diff --git a/res/TensorFlowPythonExamples/examples/space_to_batch/__init__.py b/res/TensorFlowPythonExamples/examples/space_to_batch/__init__.py
index e088012e9..b0e3d85ab 100644
--- a/res/TensorFlowPythonExamples/examples/space_to_batch/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/space_to_batch/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=[1, 2, 2, 1], name="Hole")
 pd_ = tf.constant([[0, 0], [0, 0]], name="Hole")
 op_ = tf.space_to_batch(in_, pd_, 2)
diff --git a/res/TensorFlowPythonExamples/examples/space_to_batch_nd/__init__.py b/res/TensorFlowPythonExamples/examples/space_to_batch_nd/__init__.py
index 760195063..892796b12 100644
--- a/res/TensorFlowPythonExamples/examples/space_to_batch_nd/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/space_to_batch_nd/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=[1, 2, 2, 1], name="Hole")
 bs_ = tf.constant([2, 2], name="Hole")
 pd_ = tf.constant([[0, 0], [0, 0]], name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/space_to_depth/__init__.py b/res/TensorFlowPythonExamples/examples/space_to_depth/__init__.py
index e9bc945bb..e146f6aa3 100644
--- a/res/TensorFlowPythonExamples/examples/space_to_depth/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/space_to_depth/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(tf.float32, shape=[1, 2, 2, 1], name="Hole")
 op_ = tf.nn.space_to_depth(in_, 2)
diff --git a/res/TensorFlowPythonExamples/examples/sparse_to_dense/__init__.py b/res/TensorFlowPythonExamples/examples/sparse_to_dense/__init__.py
index 5fe0bc4d0..0ce8f0bdd 100644
--- a/res/TensorFlowPythonExamples/examples/sparse_to_dense/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/sparse_to_dense/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.sparse_placeholder(tf.float32, name="Hole")
 op_ = tf.compat.v1.sparse_tensor_to_dense(in_)
diff --git a/res/TensorFlowPythonExamples/examples/split/__init__.py b/res/TensorFlowPythonExamples/examples/split/__init__.py
index 4226f30de..11f542751 100644
--- a/res/TensorFlowPythonExamples/examples/split/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/split/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 3), name="Hole")
 op_ = tf.compat.v1.split(in_, 2)
diff --git a/res/TensorFlowPythonExamples/examples/split_2/__init__.py b/res/TensorFlowPythonExamples/examples/split_2/__init__.py
index 03777df15..6212c6e81 100644
--- a/res/TensorFlowPythonExamples/examples/split_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/split_2/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 3), name="Hole")
 op_ = tf.compat.v1.split(in_, [1, 2, 1])
diff --git a/res/TensorFlowPythonExamples/examples/sqrt/__init__.py b/res/TensorFlowPythonExamples/examples/sqrt/__init__.py
index 4aab5da9c..8e304e80c 100755
--- a/res/TensorFlowPythonExamples/examples/sqrt/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/sqrt/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.sqrt(in_)
diff --git a/res/TensorFlowPythonExamples/examples/square/__init__.py b/res/TensorFlowPythonExamples/examples/square/__init__.py
index 2d03e9b89..f0c3e4410 100644
--- a/res/TensorFlowPythonExamples/examples/square/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/square/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.math.square(in_)
diff --git a/res/TensorFlowPythonExamples/examples/squared_difference/__init__.py b/res/TensorFlowPythonExamples/examples/squared_difference/__init__.py
index baacf5622..6e86f843d 100755
--- a/res/TensorFlowPythonExamples/examples/squared_difference/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/squared_difference/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.squared_difference(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/squeeze_1/__init__.py b/res/TensorFlowPythonExamples/examples/squeeze_1/__init__.py
index d054f01a2..ba2348c1e 100755
--- a/res/TensorFlowPythonExamples/examples/squeeze_1/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/squeeze_1/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 1, 4), name="Hole")
 op_ = tf.compat.v1.squeeze(in_)
diff --git a/res/TensorFlowPythonExamples/examples/squeeze_2/__init__.py b/res/TensorFlowPythonExamples/examples/squeeze_2/__init__.py
index 5715bed0e..d6134589a 100755
--- a/res/TensorFlowPythonExamples/examples/squeeze_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/squeeze_2/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 4, 1, 1), name="Hole")
 op_ = tf.compat.v1.squeeze(in_, (0, 2))
diff --git a/res/TensorFlowPythonExamples/examples/strided_slice/__init__.py b/res/TensorFlowPythonExamples/examples/strided_slice/__init__.py
index 2d7234df2..a6fa99a75 100644
--- a/res/TensorFlowPythonExamples/examples/strided_slice/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/strided_slice/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 2, 3), name="Hole")
 op_ = tf.compat.v1.strided_slice(in_, [1, 0, 0], [2, 1, 3], [1, 1, 1])
diff --git a/res/TensorFlowPythonExamples/examples/subtract/__init__.py b/res/TensorFlowPythonExamples/examples/subtract/__init__.py
index feb11b12e..39cdbc3a2 100755
--- a/res/TensorFlowPythonExamples/examples/subtract/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/subtract/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.compat.v1.subtract(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/sum/__init__.py b/res/TensorFlowPythonExamples/examples/sum/__init__.py
index 69297d6a0..14e408ca0 100644
--- a/res/TensorFlowPythonExamples/examples/sum/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/sum/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 8, 8, 4), name="Hole")
 op_ = tf.compat.v1.reduce_sum(in_, -1, True)
diff --git a/res/TensorFlowPythonExamples/examples/tanh/__init__.py b/res/TensorFlowPythonExamples/examples/tanh/__init__.py
index dd202a78d..ccd37579a 100755
--- a/res/TensorFlowPythonExamples/examples/tanh/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/tanh/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
 op_ = tf.compat.v1.tanh(in_)
diff --git a/res/TensorFlowPythonExamples/examples/tile/__init__.py b/res/TensorFlowPythonExamples/examples/tile/__init__.py
index aad4e73dd..f5d4ef8e4 100755
--- a/res/TensorFlowPythonExamples/examples/tile/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/tile/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3), name="Hole")
 multiples_ = tf.compat.v1.constant([1, 2], name="Hole")
 op_ = tf.compat.v1.tile(in_, multiples_)
diff --git a/res/TensorFlowPythonExamples/examples/top_k/__init__.py b/res/TensorFlowPythonExamples/examples/top_k/__init__.py
index e7b823400..05c330630 100644
--- a/res/TensorFlowPythonExamples/examples/top_k/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/top_k/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[4], name="Hole")
 op_ = tf.compat.v1.math.top_k(in_, k=1)
diff --git a/res/TensorFlowPythonExamples/examples/unidirectional_sequence_LSTM/__init__.py b/res/TensorFlowPythonExamples/examples/unidirectional_sequence_LSTM/__init__.py
index eaeb32ac3..3dde2b9c9 100644
--- a/res/TensorFlowPythonExamples/examples/unidirectional_sequence_LSTM/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/unidirectional_sequence_LSTM/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[28, 28, 3], name="Hole")
 op_ = tf.compat.v1.keras.layers.LSTM(1, time_major=False, return_sequences=True)(in_)
diff --git a/res/TensorFlowPythonExamples/examples/unique/__init__.py b/res/TensorFlowPythonExamples/examples/unique/__init__.py
index ad65757d0..00e4f3caf 100644
--- a/res/TensorFlowPythonExamples/examples/unique/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/unique/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(6), name="Hole")
 op_ = tf.compat.v1.unique(in_)
diff --git a/res/TensorFlowPythonExamples/examples/unstack/__init__.py b/res/TensorFlowPythonExamples/examples/unstack/__init__.py
index e4ffa2119..2a178569f 100644
--- a/res/TensorFlowPythonExamples/examples/unstack/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/unstack/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[4, 2, 3, 4], name="Hole")
 unpack_ = tf.compat.v1.unstack(in_, axis=0)
diff --git a/res/TensorFlowPythonExamples/examples/where/__init__.py b/res/TensorFlowPythonExamples/examples/where/__init__.py
index 69c89c8db..94b747259 100644
--- a/res/TensorFlowPythonExamples/examples/where/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/where/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=[2], name="Hole")
 where_ = tf.compat.v1.where(in_)
diff --git a/res/TensorFlowPythonExamples/examples/where_2/__init__.py b/res/TensorFlowPythonExamples/examples/where_2/__init__.py
index 78c50e0fe..19ad0f2f0 100644
--- a/res/TensorFlowPythonExamples/examples/where_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/where_2/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_b_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=[2], name="Hole")
 in_x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[2, 3], name="Hole")
 in_y_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[2, 3], name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/where_v2/__init__.py b/res/TensorFlowPythonExamples/examples/where_v2/__init__.py
index de87af72a..b6cc7de9e 100644
--- a/res/TensorFlowPythonExamples/examples/where_v2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/where_v2/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=[2], name="Hole")
 where_v2_ = tf.compat.v1.where_v2(in_)
diff --git a/res/TensorFlowPythonExamples/examples/where_v2_2/__init__.py b/res/TensorFlowPythonExamples/examples/where_v2_2/__init__.py
index 4ce17ca11..e3ffe03b7 100644
--- a/res/TensorFlowPythonExamples/examples/where_v2_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/where_v2_2/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_b_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=[3], name="Hole")
 in_x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[2, 1], name="Hole")
 in_y_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[1, 3], name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/while/__init__.py b/res/TensorFlowPythonExamples/examples/while/__init__.py
index fadaa73e2..15ff4eb65 100644
--- a/res/TensorFlowPythonExamples/examples/while/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/while/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 i = tf.compat.v1.constant(0, name="Hole")
 
 c = lambda i: tf.compat.v1.less(i, 10)
diff --git a/res/TensorFlowPythonExamples/examples/while_2/__init__.py b/res/TensorFlowPythonExamples/examples/while_2/__init__.py
index af1c74582..9e26639bf 100644
--- a/res/TensorFlowPythonExamples/examples/while_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/while_2/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 i = tf.constant(0, shape=[1, 0], dtype=tf.int32, name='i')
 x = tf.compat.v1.placeholder(shape=[1, 1], dtype=tf.int32, name='Hole')
 
diff --git a/res/TensorFlowPythonExamples/examples/while_3/__init__.py b/res/TensorFlowPythonExamples/examples/while_3/__init__.py
index 840846e7e..30ce15a1e 100644
--- a/res/TensorFlowPythonExamples/examples/while_3/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/while_3/__init__.py
@@ -1,5 +1,7 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 x = tf.compat.v1.placeholder(shape=[1, None], dtype=tf.int32, name='Hole')
 i = tf.compat.v1.placeholder(shape=[1, None], dtype=tf.int32, name='Hole_2')
 
diff --git a/res/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py b/res/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py
index 5230bbac6..16414cea2 100755
--- a/res/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 16, 16, 3), name="Hole")
 op_ = tf.compat.v1.image.yuv_to_rgb(in_)
diff --git a/res/TensorFlowPythonExamples/examples/zeros_like/__init__.py b/res/TensorFlowPythonExamples/examples/zeros_like/__init__.py
index 7daf85e84..d4080ec43 100644
--- a/res/TensorFlowPythonExamples/examples/zeros_like/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/zeros_like/__init__.py
@@ -1,4 +1,6 @@
 import tensorflow as tf
 
+tf.compat.v1.disable_eager_execution()
+
 in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
 op_ = tf.zeros_like(in_)
diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle
index 9a9465072..bc39a09b9 100644
--- a/runtime/contrib/android/api/build.gradle
+++ b/runtime/contrib/android/api/build.gradle
@@ -8,7 +8,7 @@ android {
         minSdkVersion 26
         targetSdkVersion 29
         versionCode 1
-        versionName "1.20.0"
+        versionName "1.21.0"
 
         externalNativeBuild {
             ndkBuild {
diff --git a/runtime/libs/misc/CMakeLists.txt b/runtime/libs/misc/CMakeLists.txt
index 557d403ec..69d6a9208 100644
--- a/runtime/libs/misc/CMakeLists.txt
+++ b/runtime/libs/misc/CMakeLists.txt
@@ -1,11 +1,22 @@
 # Library `nnfw_lib_misc`
-file(GLOB_RECURSE NNFW_UTILITY_SRCS "src/*.cpp")
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
 
-add_library(nnfw_lib_misc STATIC ${NNFW_UTILITY_SRCS})
+add_library(nnfw_lib_misc STATIC ${SOURCES})
 target_include_directories(nnfw_lib_misc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
 set_target_properties(nnfw_lib_misc PROPERTIES POSITION_INDEPENDENT_CODE ON)
 target_link_libraries(nnfw_lib_misc PRIVATE nnfw_common)
 target_link_libraries(nnfw_lib_misc PRIVATE nnfw_coverage)
 
-add_executable(nnfw_tensor_index_iterator "examples/tensor_index_iterator.cpp")
-target_link_libraries(nnfw_tensor_index_iterator nnfw_lib_misc)
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+add_executable(nnfw_lib_misc_test ${TESTS})
+target_link_libraries(nnfw_lib_misc_test PRIVATE nnfw_lib_misc)
+target_link_libraries(nnfw_lib_misc_test PRIVATE nnfw_coverage)
+target_link_libraries(nnfw_lib_misc_test PUBLIC gtest gtest_main ${LIB_PTHREAD})
+
+add_test(nnfw_lib_misc_test nnfw_lib_misc_test)
+install(TARGETS nnfw_lib_misc_test DESTINATION unittest_standalone)
diff --git a/runtime/libs/misc/examples/tensor_index_iterator.cpp b/runtime/libs/misc/examples/tensor_index_iterator.cpp
deleted file mode 100644
index 590b433df..000000000
--- a/runtime/libs/misc/examples/tensor_index_iterator.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "misc/tensor/IndexIterator.h"
-
-#include <array>
-
-#include <iostream>
-#include <algorithm>
-
-#include <cassert>
-
-void test_iterate(void)
-{
-  const nnfw::misc::tensor::Shape shape{3, 4, 7};
-
-  std::array<int, 3 * 4 * 7> array;
-
-  array.fill(0);
-
-  using nnfw::misc::tensor::Index;
-  using nnfw::misc::tensor::iterate;
-
-  iterate(shape) << [&](const Index &index) {
-    assert(index.rank() == shape.rank());
-
-    const uint32_t rank = index.rank();
-
-    uint32_t offset = index.at(0);
-
-    for (uint32_t axis = 1; axis < rank; ++axis)
-    {
-      offset *= shape.dim(axis);
-      offset += index.at(axis);
-    }
-
-    array[offset] += 1;
-  };
-
-  assert(std::all_of(array.begin(), array.end(), [](int num) { return num == 1; }));
-}
-
-int main(int argc, char **argv)
-{
-  test_iterate();
-
-  nnfw::misc::tensor::Shape shape{3, 4, 3, 4};
-
-  std::cout << "Iterate over tensor{3, 4, 3, 4}" << std::endl;
-
-  nnfw::misc::tensor::iterate(shape) << [](const nnfw::misc::tensor::Index &index) {
-    std::cout << "rank: " << index.rank() << std::endl;
-
-    for (uint32_t d = 0; d < index.rank(); ++d)
-    {
-      std::cout << "  offset(" << d << ") = " << index.at(d) << std::endl;
-    }
-  };
-
-  return 0;
-}
diff --git a/runtime/libs/misc/include/misc/EnvConfigSource.h b/runtime/libs/misc/include/misc/EnvConfigSource.h
new file mode 100644
index 000000000..63c8ae9c0
--- /dev/null
+++ b/runtime/libs/misc/include/misc/EnvConfigSource.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_MISC_ENV_CONFIG_SOURCE_H__
+#define __NNFW_MISC_ENV_CONFIG_SOURCE_H__
+
+#include "GeneralConfigSource.h"
+
+#include <unordered_map>
+
+namespace nnfw
+{
+namespace misc
+{
+
+class EnvConfigSource final : public GeneralConfigSource
+{
+public:
+  std::string get(const std::string &key) const override;
+
+private:
+  std::unordered_map<std::string, std::string> _default_attributes;
+};
+
+} // namespace misc
+} // namespace nnfw
+
+#endif // __NNFW_MISC_ENV_CONFIG_SOURCE_H__
diff --git a/runtime/libs/misc/include/misc/GeneralConfigSource.h b/runtime/libs/misc/include/misc/GeneralConfigSource.h
new file mode 100644
index 000000000..a3de66e81
--- /dev/null
+++ b/runtime/libs/misc/include/misc/GeneralConfigSource.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_MISC_GENERAL_CONFIG_SOURCE_H__
+#define __NNFW_MISC_GENERAL_CONFIG_SOURCE_H__
+
+#include "IConfigSource.h"
+
+#include <unordered_map>
+
+namespace nnfw
+{
+namespace misc
+{
+
+class GeneralConfigSource : public IConfigSource
+{
+public:
+  GeneralConfigSource() = default;
+
+  std::string get(const std::string &key) const override;
+  void set(const std::string &key, const std::string &val);
+
+private:
+  std::unordered_map<std::string, std::string> _map;
+};
+
+} // namespace misc
+} // namespace nnfw
+
+#endif // __NNFW_MISC_GENERAL_CONFIG_SOURCE_H__
diff --git a/runtime/libs/misc/include/misc/IConfigSource.h b/runtime/libs/misc/include/misc/IConfigSource.h
new file mode 100644
index 000000000..fe2c48ecf
--- /dev/null
+++ b/runtime/libs/misc/include/misc/IConfigSource.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_MISC_I_CONFIG_SOURCE_H__
+#define __NNFW_MISC_I_CONFIG_SOURCE_H__
+
+#include <string>
+
+namespace nnfw
+{
+namespace misc
+{
+
+struct IConfigSource
+{
+  /**
+   * @brief Destroy the IConfigSource object
+   */
+  virtual ~IConfigSource() = default;
+
+  /**
+   * @brief get the value for the matching key
+   *
+   * @param key string key to search
+   * @return string value associated with the key
+   */
+  virtual std::string get(const std::string &key) const = 0;
+};
+
+} // namespace misc
+} // namespace nnfw
+
+#endif // __NNFW_MISC_I_CONFIG_SOURCE_H__
diff --git a/runtime/libs/misc/include/misc/string_helpers.h b/runtime/libs/misc/include/misc/string_helpers.h
index 46fecca71..c9d72034f 100644
--- a/runtime/libs/misc/include/misc/string_helpers.h
+++ b/runtime/libs/misc/include/misc/string_helpers.h
@@ -50,7 +50,7 @@ inline std::vector<std::string> split(const std::string &s, char delim)
   std::vector<std::string> elems;
   while (std::getline(ss, item, delim))
   {
-    elems.push_back(std::move(item));
+    elems.push_back(item);
   }
   return elems;
 }
diff --git a/runtime/libs/misc/src/EnvConfigSource.cpp b/runtime/libs/misc/src/EnvConfigSource.cpp
new file mode 100644
index 000000000..3abc9d196
--- /dev/null
+++ b/runtime/libs/misc/src/EnvConfigSource.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "misc/EnvConfigSource.h"
+
+#include <cstdlib>
+
+namespace nnfw
+{
+namespace misc
+{
+
+std::string EnvConfigSource::get(const std::string &key) const
+{
+  const char *value = std::getenv(key.c_str());
+  if (value != nullptr)
+  {
+    return value;
+  }
+  else
+  {
+    return GeneralConfigSource::get(key);
+  }
+}
+
+} // namespace misc
+} // namespace nnfw
diff --git a/runtime/libs/misc/src/GeneralConfigSource.cpp b/runtime/libs/misc/src/GeneralConfigSource.cpp
new file mode 100644
index 000000000..298c1663e
--- /dev/null
+++ b/runtime/libs/misc/src/GeneralConfigSource.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "misc/GeneralConfigSource.h"
+
+namespace nnfw
+{
+namespace misc
+{
+
+std::string GeneralConfigSource::get(const std::string &key) const
+{
+  auto itr = _map.find(key);
+  if (itr == _map.end())
+  {
+    return "";
+  }
+  else
+  {
+    return itr->second;
+  }
+}
+
+void GeneralConfigSource::set(const std::string &key, const std::string &val) { _map[key] = val; }
+
+} // namespace misc
+} // namespace nnfw
diff --git a/runtime/libs/misc/src/string_helpers.test.cpp b/runtime/libs/misc/src/string_helpers.test.cpp
new file mode 100644
index 000000000..1111425d0
--- /dev/null
+++ b/runtime/libs/misc/src/string_helpers.test.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "misc/string_helpers.h"
+
+#include <gtest/gtest.h>
+
+TEST(StringHelpersTest, split)
+{
+  const std::string example = "abc;def;ghi";
+
+  auto str_vector = nnfw::misc::split(example, ';');
+
+  ASSERT_EQ(str_vector.size(), 3);
+  EXPECT_STREQ(str_vector[0].c_str(), "abc");
+  EXPECT_STREQ(str_vector[1].c_str(), "def");
+  EXPECT_STREQ(str_vector[2].c_str(), "ghi");
+}
+
+TEST(StringHelpersTest, neg_split_empty)
+{
+  const std::string example = "";
+
+  auto str_vector = nnfw::misc::split(example, ';');
+
+  ASSERT_EQ(str_vector.size(), 0);
+}
+
+TEST(StringHelpersTest, neg_nonsplit)
+{
+  const std::string example = "abc;def;ghi";
+
+  auto str_vector = nnfw::misc::split(example, ':');
+
+  ASSERT_EQ(str_vector.size(), 1);
+  EXPECT_STREQ(str_vector[0].c_str(), example.c_str());
+}
+
+TEST(StringHelpersTest, append)
+{
+  auto append_str = nnfw::misc::str("abc", "-", 1);
+
+  EXPECT_STREQ(append_str.c_str(), "abc-1");
+}
+
+TEST(StringHelpersTest, neg_append_nullstr)
+{
+  const char *null_str = nullptr;
+  auto append_str = nnfw::misc::str(null_str, null_str);
+
+  ASSERT_EQ(append_str.size(), 0);
+}
+
+TEST(StringHelpersTest, join)
+{
+  const std::vector<std::string> example = {"abc", "def", "ghi"};
+
+  auto join_str = nnfw::misc::join(example.begin(), example.end(), ";");
+  EXPECT_STREQ(join_str.c_str(), "abc;def;ghi");
+}
+
+TEST(StringHelpersTest, neg_join_empty)
+{
+  const std::vector<std::string> example = {};
+
+  auto join_str = nnfw::misc::join(example.begin(), example.end(), ";");
+  ASSERT_EQ(join_str.size(), 0);
+}
diff --git a/runtime/libs/misc/src/tensor/IndexEnumerator.test.cpp b/runtime/libs/misc/src/tensor/IndexEnumerator.test.cpp
new file mode 100644
index 000000000..4cff6067f
--- /dev/null
+++ b/runtime/libs/misc/src/tensor/IndexEnumerator.test.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "misc/tensor/IndexEnumerator.h"
+
+#include <vector>
+#include <algorithm>
+
+#include <gtest/gtest.h>
+
+using nnfw::misc::tensor::Shape;
+using nnfw::misc::tensor::Index;
+using nnfw::misc::tensor::IndexEnumerator;
+
+TEST(MiscIndexEnumeratorTest, iterate_full_range)
+{
+  const uint32_t H = 3;
+  const uint32_t W = 4;
+
+  const Shape shape{H, W};
+
+  std::vector<uint32_t> count;
+
+  count.resize(H * W, 0);
+
+  for (IndexEnumerator e{shape}; e.valid(); e.advance())
+  {
+    const auto &ind = e.curr();
+
+    ASSERT_EQ(2, ind.rank());
+    count.at(ind.at(0) * W + ind.at(1)) += 1;
+  }
+
+  ASSERT_TRUE(std::all_of(count.begin(), count.end(), [](uint32_t n) { return n == 1; }));
+}
+
+TEST(MiscIndexEnumeratorTest, neg_zero_rank_shape)
+{
+  // Test abnormal case of empty shape
+  // It is expected not to throw any exception, do nothing
+  const Shape shape{};
+  IndexEnumerator e{shape};
+  ASSERT_NO_THROW(e.valid());
+  ASSERT_NO_THROW(e.advance());
+  SUCCEED();
+}
diff --git a/runtime/libs/misc/src/tensor/IndexIterator.test.cpp b/runtime/libs/misc/src/tensor/IndexIterator.test.cpp
new file mode 100644
index 000000000..875786bdd
--- /dev/null
+++ b/runtime/libs/misc/src/tensor/IndexIterator.test.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "misc/tensor/IndexIterator.h"
+
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <array>
+
+using namespace nnfw::misc::tensor;
+
+TEST(MiscIndexIteratorTest, iterate)
+{
+  const Shape shape{3, 4, 7};
+
+  std::array<int, 3 * 4 * 7> array;
+
+  array.fill(0);
+
+  iterate(shape) << [&](const Index &index) {
+    assert(index.rank() == shape.rank());
+
+    const uint32_t rank = index.rank();
+
+    uint32_t offset = index.at(0);
+
+    for (uint32_t axis = 1; axis < rank; ++axis)
+    {
+      offset *= shape.dim(axis);
+      offset += index.at(axis);
+    }
+
+    array[offset] += 1;
+  };
+
+  ASSERT_TRUE(std::all_of(array.begin(), array.end(), [](int num) { return num == 1; }));
+}
+
+TEST(MiscIndexIteratorTest, neg_zero_rank_shape)
+{
+  // Test abnormal case of empty shape
+  // It is expected not to throw any exception, do nothing
+  const Shape shape{};
+
+  ASSERT_NO_THROW(iterate(shape) << ([](const Index &index) {}));
+  SUCCEED();
+}
diff --git a/runtime/libs/ndarray/CMakeLists.txt b/runtime/libs/ndarray/CMakeLists.txt
index f88f13186..cf8c5208a 100644
--- a/runtime/libs/ndarray/CMakeLists.txt
+++ b/runtime/libs/ndarray/CMakeLists.txt
@@ -3,8 +3,6 @@ add_library(ndarray STATIC src/Array.cpp src/ContiguousSpan.cpp)
 set_target_properties(ndarray PROPERTIES POSITION_INDEPENDENT_CODE ON)
 
 target_include_directories(ndarray PUBLIC include)
-#can't make this private because of c++ templates
-target_include_directories(ndarray PUBLIC src)
 
 option(NDARRAY_INLINE_TEMPLATES "Set to ON to disable extern declarations for common types")
 
@@ -19,5 +17,12 @@ if(NOT ENABLE_TEST)
   return()
 endif(NOT ENABLE_TEST)
 
-add_subdirectory(test)
+add_executable(ndarray_test src/Array.test.cpp src/ContiguousSpan.test.cpp)
+target_link_libraries(ndarray_test PRIVATE ndarray)
+target_link_libraries(ndarray_test PRIVATE nnfw_coverage)
+target_link_libraries(ndarray_test PUBLIC gtest gtest_main ${LIB_PTHREAD})
+
+add_test(ndarray_test ndarray_test)
+install(TARGETS ndarray_test DESTINATION unittest_standalone)
+
 add_subdirectory(example)
diff --git a/runtime/libs/ndarray/include/ndarray/Array.h b/runtime/libs/ndarray/include/ndarray/Array.h
index 09e791763..568fe1c77 100644
--- a/runtime/libs/ndarray/include/ndarray/Array.h
+++ b/runtime/libs/ndarray/include/ndarray/Array.h
@@ -22,37 +22,21 @@
 #include "ContiguousSpan.h"
 #include "Shape.h"
 
-#if __cplusplus < 201402L
-#include "detail/cxx14.h" //integer_sequence and make_index_dequence definitions
-#else
-#include <utility>
-#endif
-
 #include <algorithm>
-#include <cassert>
-#include <type_traits>
 #include <array>
-#include <tuple>
+#include <cassert>
 #include <cstddef>
+#include <tuple>
+#include <type_traits>
+#include <utility>
 
 namespace ndarray
 {
 
-// there is no index_sequence before c++14
-#if __cplusplus < 201402L
-
-template <size_t... Nums> using index_sequence = cxx14::index_sequence<Nums...>;
-
-template <size_t Num> using make_index_sequence = cxx14::make_index_sequence<Num>;
-
-#else
-
 template <size_t... Nums> using index_sequence = std::index_sequence<Nums...>;
 
 template <size_t _Num> using make_index_sequence = std::make_index_sequence<_Num>;
 
-#endif //__cplusplus < 201402L
-
 struct Strides
 {
   explicit Strides(Shape s) : _strides{} { fillStrides(s); }
diff --git a/runtime/libs/ndarray/src/Array.test.cpp b/runtime/libs/ndarray/src/Array.test.cpp
new file mode 100644
index 000000000..15e67600d
--- /dev/null
+++ b/runtime/libs/ndarray/src/Array.test.cpp
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ndarray/Array.h"
+
+#include <gtest/gtest.h>
+
+using namespace ndarray;
+
+TEST(NDArrayArrayTests, basic_data_test)
+{
+  float raw_data[] = {1, 2, 3, 4};
+  int32_t raw_data_int[] = {1, 2, 3, 4};
+  uint32_t raw_data_uint[] = {1, 2, 3, 4};
+  int8_t raw_data_int8[] = {1, 2, 3, 4};
+
+  Array<float> data22{raw_data, {2, 2}};
+  Array<int32_t> data22_int{raw_data_int, {2, 2}};
+  Array<uint32_t> data22_uint{raw_data_uint, {2, 2}};
+  Array<int8_t> data22_int8{raw_data_int8, {2, 2}};
+
+  ASSERT_FLOAT_EQ(data22.at(0, 0), 1);
+  ASSERT_FLOAT_EQ(data22.at(0, 1), 2);
+  ASSERT_FLOAT_EQ(data22.at(1, 0), 3);
+  ASSERT_FLOAT_EQ(data22.at(1, 1), 4);
+  ASSERT_EQ(data22.shape().rank(), 2);
+  ASSERT_EQ(data22.shape().dim(0), 2);
+  ASSERT_EQ(data22.shape().dim(1), 2);
+
+  Array<float> data14{raw_data, {1, 4}};
+  ASSERT_FLOAT_EQ(data14.at(0, 0), 1);
+  ASSERT_FLOAT_EQ(data14.at(0, 1), 2);
+  ASSERT_FLOAT_EQ(data14.at(0, 2), 3);
+  ASSERT_FLOAT_EQ(data14.at(0, 3), 4);
+  ASSERT_EQ(data14.shape().rank(), 2);
+  ASSERT_EQ(data14.shape().dim(0), 1);
+  ASSERT_EQ(data14.shape().dim(1), 4);
+
+  // <float, false>
+  {
+    ContiguousSpan<float> cs = data22.flat();
+    ASSERT_EQ(cs.size(), 4);
+    ASSERT_FLOAT_EQ(cs.at(3), 4);
+
+    ContiguousSpan<float> cs2 = std::move(cs);
+    ASSERT_EQ(cs2.size(), 4);
+    ASSERT_FLOAT_EQ(cs2.at(3), 4);
+
+    float sum = 0;
+    for (auto it = cs2.begin(); it < cs2.end(); it++)
+    {
+      sum += *it;
+    }
+    ASSERT_EQ(sum, 10);
+
+    std::vector<float> array_data{1, 2, 3, 4};
+    auto cs3 = std::make_unique<ContiguousSpan<float>>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs3->size(), 4);
+    ASSERT_FLOAT_EQ(cs3->at(3), 4);
+
+    auto cs4 = std::move(cs3);
+    ASSERT_EQ(cs3, nullptr);
+    ASSERT_EQ(cs4->size(), 4);
+    ASSERT_FLOAT_EQ(cs4->at(3), 4);
+  }
+
+  // <float, true>
+  {
+    ContiguousSpan<float, true> cs = data22.flat();
+    ASSERT_EQ(cs.size(), 4);
+    ASSERT_FLOAT_EQ(cs.at(3), 4);
+
+    ContiguousSpan<float, true> cs2 = std::move(cs);
+    ASSERT_EQ(cs2.size(), 4);
+    ASSERT_FLOAT_EQ(cs2.at(3), 4);
+
+    float sum = 0;
+    for (auto it = cs2.begin(); it < cs2.end(); it++)
+    {
+      sum += *it;
+    }
+    ASSERT_FLOAT_EQ(sum, 10);
+
+    std::vector<float> array_data{1, 2, 3, 4};
+    auto cs3 = std::make_unique<ContiguousSpan<float, true>>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs3->size(), 4);
+    ASSERT_FLOAT_EQ(cs3->at(3), 4);
+
+    auto cs4 = std::move(cs3);
+    ASSERT_EQ(cs3, nullptr);
+    ASSERT_EQ(cs4->size(), 4);
+    ASSERT_FLOAT_EQ(cs4->at(3), 4);
+  }
+
+  // <int32_t, false>
+  {
+    ContiguousSpan<int32_t> cs = data22_int.flat();
+    ASSERT_EQ(cs.size(), 4);
+    ASSERT_EQ(cs.at(3), 4);
+
+    ContiguousSpan<int32_t> cs2 = std::move(cs);
+    ASSERT_EQ(cs2.size(), 4);
+    ASSERT_EQ(cs2.at(3), 4);
+
+    int32_t sum = 0;
+    for (auto it = cs2.begin(); it < cs2.end(); it++)
+    {
+      sum += *it;
+    }
+    ASSERT_EQ(sum, 10);
+
+    std::vector<int32_t> array_data{1, 2, 3, 4};
+    auto cs3 = std::make_unique<ContiguousSpan<int32_t>>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs3->size(), 4);
+    ASSERT_EQ(cs3->at(3), 4);
+
+    auto cs4 = std::move(cs3);
+    ASSERT_EQ(cs3, nullptr);
+    ASSERT_EQ(cs4->size(), 4);
+    ASSERT_EQ(cs4->at(3), 4);
+  }
+
+  // <int32_t, true>
+  {
+    ContiguousSpan<int32_t, true> cs = data22_int.flat();
+    ASSERT_EQ(cs.size(), 4);
+    ASSERT_EQ(cs.at(3), 4);
+
+    ContiguousSpan<int32_t, true> cs2 = std::move(cs);
+    ASSERT_EQ(cs2.size(), 4);
+    ASSERT_EQ(cs2.at(3), 4);
+
+    int32_t sum = 0;
+    for (auto it = cs2.begin(); it < cs2.end(); it++)
+    {
+      sum += *it;
+    }
+    ASSERT_EQ(sum, 10);
+
+    std::vector<int32_t> array_data{1, 2, 3, 4};
+    auto cs3 =
+      std::make_unique<ContiguousSpan<int32_t, true>>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs3->size(), 4);
+    ASSERT_EQ(cs3->at(3), 4);
+
+    auto cs4 = std::move(cs3);
+    ASSERT_EQ(cs3, nullptr);
+    ASSERT_EQ(cs4->size(), 4);
+    ASSERT_EQ(cs4->at(3), 4);
+  }
+
+  // <uint32_t, false>
+  {
+    ContiguousSpan<uint32_t> cs = data22_uint.flat();
+    ASSERT_EQ(cs.size(), 4);
+    ASSERT_EQ(cs.at(3), 4);
+
+    ContiguousSpan<uint32_t> cs2 = std::move(cs);
+    ASSERT_EQ(cs2.size(), 4);
+    ASSERT_EQ(cs2.at(3), 4);
+
+    uint32_t sum = 0;
+    for (auto it = cs2.begin(); it < cs2.end(); it++)
+    {
+      sum += *it;
+    }
+    ASSERT_EQ(sum, 10);
+
+    std::vector<uint32_t> array_data{1, 2, 3, 4};
+    auto cs3 = std::make_unique<ContiguousSpan<uint32_t>>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs3->size(), 4);
+    ASSERT_EQ(cs3->at(3), 4);
+
+    auto cs4 = std::move(cs3);
+    ASSERT_EQ(cs3, nullptr);
+    ASSERT_EQ(cs4->size(), 4);
+  }
+
+  // <uint32_t, true>
+  {
+    ContiguousSpan<uint32_t, true> cs = data22_uint.flat();
+    ASSERT_EQ(cs.size(), 4);
+    ASSERT_EQ(cs.at(3), 4);
+
+    ContiguousSpan<uint32_t, true> cs2 = std::move(cs);
+    ASSERT_EQ(cs2.size(), 4);
+    ASSERT_EQ(cs2.at(3), 4);
+
+    uint32_t sum = 0;
+    for (auto it = cs2.begin(); it < cs2.end(); it++)
+    {
+      sum += *it;
+    }
+    ASSERT_EQ(sum, 10);
+
+    std::vector<uint32_t> array_data{1, 2, 3, 4};
+    auto cs3 =
+      std::make_unique<ContiguousSpan<uint32_t, true>>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs3->size(), 4);
+    ASSERT_EQ(cs3->at(3), 4);
+
+    auto cs4 = std::move(cs3);
+    ASSERT_EQ(cs3, nullptr);
+    ASSERT_EQ(cs4->size(), 4);
+    ASSERT_EQ(cs4->at(3), 4);
+  }
+
+  // <int8_t, false>
+  {
+    ContiguousSpan<int8_t> cs = data22_int8.flat();
+    ASSERT_EQ(cs.size(), 4);
+    ASSERT_FLOAT_EQ(cs.at(3), 4);
+
+    ContiguousSpan<int8_t> cs2 = std::move(cs);
+    ASSERT_EQ(cs2.size(), 4);
+    ASSERT_FLOAT_EQ(cs2.at(3), 4);
+
+    int8_t sum = 0;
+    for (auto it = cs2.begin(); it < cs2.end(); it++)
+    {
+      sum += *it;
+    }
+    ASSERT_EQ(sum, 10);
+
+    std::vector<int8_t> array_data{1, 2, 3, 4};
+    auto cs3 = std::make_unique<ContiguousSpan<int8_t>>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs3->size(), 4);
+    ASSERT_EQ(cs3->at(3), 4);
+
+    auto cs4 = std::move(cs3);
+    ASSERT_EQ(cs3, nullptr);
+    ASSERT_EQ(cs4->size(), 4);
+    ASSERT_EQ(cs4->at(3), 4);
+
+    auto cs5 = ContiguousSpan<int8_t>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs5.size(), 4);
+    ASSERT_EQ(cs5.at(3), 4);
+  }
+
+  // <int8_t, true>
+  {
+    ContiguousSpan<int8_t, true> cs = data22_int8.flat();
+    ASSERT_EQ(cs.size(), 4);
+    ASSERT_FLOAT_EQ(cs.at(3), 4);
+
+    ContiguousSpan<int8_t, true> cs2 = std::move(cs);
+    ASSERT_EQ(cs2.size(), 4);
+    ASSERT_FLOAT_EQ(cs2.at(3), 4);
+
+    int8_t sum = 0;
+    for (auto it = cs2.begin(); it < cs2.end(); it++)
+    {
+      sum += *it;
+    }
+    ASSERT_EQ(sum, 10);
+
+    std::vector<int8_t> array_data{1, 2, 3, 4};
+    auto cs3 = std::make_unique<ContiguousSpan<int8_t, true>>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs3->size(), 4);
+    ASSERT_EQ(cs3->at(3), 4);
+
+    auto cs4 = std::move(cs3);
+    ASSERT_EQ(cs3, nullptr);
+    ASSERT_EQ(cs4->size(), 4);
+    ASSERT_EQ(cs4->at(3), 4);
+
+    auto cs5 = ContiguousSpan<int8_t, true>(array_data.begin(), array_data.end());
+    ASSERT_EQ(cs5.size(), 4);
+    ASSERT_EQ(cs5.at(3), 4);
+  }
+
+  Array<float> lv = std::move(data14);
+  ASSERT_FLOAT_EQ(lv.at(0, 0), 1);
+  ASSERT_FLOAT_EQ(lv.at(0, 1), 2);
+  ASSERT_FLOAT_EQ(lv.at(0, 2), 3);
+  ASSERT_FLOAT_EQ(lv.at(0, 3), 4);
+}
+
+TEST(NDArrayArrayTests, slice_write_test)
+{
+  // float
+  {
+    float raw_data[4] = {0};
+
+    Array<float> data22{raw_data, {2, 2}};
+
+    data22.slice(1) = {1, 2};
+
+    ASSERT_FLOAT_EQ(data22.at(0, 0), 0);
+    ASSERT_FLOAT_EQ(data22.at(0, 1), 0);
+    ASSERT_FLOAT_EQ(data22.at(1, 0), 1);
+    ASSERT_FLOAT_EQ(data22.at(1, 1), 2);
+  }
+
+  // int32_t
+  {
+    int32_t raw_data[4] = {0};
+    Array<int32_t> data22{raw_data, {2, 2}};
+
+    data22.slice(1) = {1, 2};
+
+    ASSERT_EQ(data22.at(0, 0), 0);
+    ASSERT_EQ(data22.at(0, 1), 0);
+    ASSERT_EQ(data22.at(1, 0), 1);
+    ASSERT_EQ(data22.at(1, 1), 2);
+  }
+
+  // uint32_t
+  {
+    uint32_t raw_data[4] = {0};
+    Array<uint32_t> data22{raw_data, {2, 2}};
+
+    data22.slice(1) = {1, 2};
+
+    ASSERT_EQ(data22.at(0, 0), 0);
+    ASSERT_EQ(data22.at(0, 1), 0);
+    ASSERT_EQ(data22.at(1, 0), 1);
+    ASSERT_EQ(data22.at(1, 1), 2);
+  }
+
+  // int8_t
+  {
+    int8_t raw_data[4] = {0};
+    Array<int8_t> data22{raw_data, {2, 2}};
+
+    data22.slice(1) = {1, 2};
+
+    ASSERT_EQ(data22.at(0, 0), 0);
+    ASSERT_EQ(data22.at(0, 1), 0);
+    ASSERT_EQ(data22.at(1, 0), 1);
+    ASSERT_EQ(data22.at(1, 1), 2);
+  }
+}
+
+TEST(NDArrayArrayTests, slice_read_test)
+{
+  // float
+  {
+    float raw_data[4] = {1, 2, 3, 4};
+
+    Array<float> data22{raw_data, {2, 2}};
+
+    auto slice = data22.slice(1);
+
+    ASSERT_FLOAT_EQ(slice[0], 3);
+    ASSERT_FLOAT_EQ(slice[1], 4);
+  }
+
+  // int32_t
+  {
+    int32_t raw_data[4] = {1, 2, 3, 4};
+
+    Array<int32_t> data22{raw_data, {2, 2}};
+
+    auto slice = data22.slice(1);
+
+    ASSERT_EQ(slice[0], 3);
+    ASSERT_EQ(slice[1], 4);
+  }
+
+  // uint32_t
+  {
+    uint32_t raw_data[4] = {1, 2, 3, 4};
+
+    Array<uint32_t> data22{raw_data, {2, 2}};
+
+    auto slice = data22.slice(1);
+
+    ASSERT_EQ(slice[0], 3);
+    ASSERT_EQ(slice[1], 4);
+  }
+
+  // int8_t
+  {
+    int8_t raw_data[4] = {1, 2, 3, 4};
+
+    Array<int8_t> data22{raw_data, {2, 2}};
+
+    auto slice = data22.slice(1);
+
+    ASSERT_EQ(slice[0], 3);
+    ASSERT_EQ(slice[1], 4);
+  }
+}
+
+TEST(NDArrayArrayTests, multidim_test)
+{
+  // float
+  {
+    float raw_data[5] = {0, 1, 2, 3, 4};
+
+    Array<float> data22{raw_data, {1, 1, 1, 1, 5}};
+
+    ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 0), 0);
+    ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 1), 1);
+    ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 2), 2);
+    ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 3), 3);
+    ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 4), 4);
+  }
+
+  // int32_t
+  {
+    int32_t raw_data[5] = {0, 1, 2, 3, 4};
+
+    Array<int32_t> data22{raw_data, {1, 1, 1, 1, 5}};
+
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 0), 0);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 1), 1);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 2), 2);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 3), 3);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 4), 4);
+  }
+
+  // uint32_t
+  {
+    uint32_t raw_data[5] = {0, 1, 2, 3, 4};
+
+    Array<uint32_t> data22{raw_data, {1, 1, 1, 1, 5}};
+
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 0), 0);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 1), 1);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 2), 2);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 3), 3);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 4), 4);
+  }
+
+  // int8_t
+  {
+    int8_t raw_data[5] = {0, 1, 2, 3, 4};
+
+    Array<int8_t> data22{raw_data, {1, 1, 1, 1, 5}};
+
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 0), 0);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 1), 1);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 2), 2);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 3), 3);
+    ASSERT_EQ(data22.at(0, 0, 0, 0, 4), 4);
+  }
+}
diff --git a/runtime/libs/ndarray/src/ContiguousSpan.test.cpp b/runtime/libs/ndarray/src/ContiguousSpan.test.cpp
new file mode 100644
index 000000000..dd1108697
--- /dev/null
+++ b/runtime/libs/ndarray/src/ContiguousSpan.test.cpp
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ndarray/ContiguousSpan.h"
+
+#include <gtest/gtest.h>
+
+using namespace ndarray;
+
+TEST(NDArrayContiguousSpanTests, slice_assign_test)
+{
+  // float
+  {
+    std::vector<float> v1{1, 2, 3, 4, 5};
+    std::vector<float> v2(5);
+
+    ContiguousSpan<float> span1(v1.begin(), v1.end());
+    ContiguousSpan<float> span2(v2.begin(), v2.end());
+
+    span2.assign(span1);
+
+    ASSERT_EQ(v1, v2);
+    ASSERT_EQ(span1.size(), 5);
+    ASSERT_EQ(span2.size(), 5);
+
+    ASSERT_EQ(span2.at(2), 3);
+    ASSERT_EQ(span2.at(4), 5);
+
+    ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2));
+
+    ContiguousSpan<float> span3(span2.offset(1));
+    ASSERT_EQ(span3.size(), 4);
+    ASSERT_EQ(span3.at(0), 2);
+    ASSERT_EQ(span3.at(1), 3);
+    ASSERT_EQ(span3[2], 4);
+    ASSERT_EQ(span3[3], 5);
+
+    // const
+    ContiguousSpan<float, true> span4(v1.begin(), v1.end());
+    ASSERT_EQ(span4.size(), 5);
+    ASSERT_EQ(span4.at(0), 1);
+    ASSERT_EQ(span4.at(1), 2);
+    ASSERT_EQ(span4.at(2), 3);
+    ASSERT_EQ(span4[3], 4);
+    ASSERT_EQ(span4[4], 5);
+
+    ContiguousSpan<float, true> span5(span4.offset(1));
+    ASSERT_EQ(span5.size(), 4);
+    ASSERT_EQ(span5.at(0), 2);
+    ASSERT_EQ(span5.at(1), 3);
+    ASSERT_EQ(span5[2], 4);
+    ASSERT_EQ(span5[3], 5);
+
+    ASSERT_EQ(*(span5.data() + 2), *(span5.data() + 2));
+  }
+
+  // int32_t
+  {
+    std::vector<int32_t> v1{1, 2, 3, 4, 5};
+    std::vector<int32_t> v2(5);
+
+    ContiguousSpan<int32_t> span1(v1.begin(), v1.end());
+    ContiguousSpan<int32_t> span2(v2.begin(), v2.end());
+
+    span2.assign(span1);
+
+    ASSERT_EQ(v1, v2);
+    ASSERT_EQ(span1.size(), 5);
+    ASSERT_EQ(span2.size(), 5);
+
+    ASSERT_EQ(span2.at(2), 3);
+    ASSERT_EQ(span2.at(4), 5);
+
+    ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2));
+
+    ContiguousSpan<int32_t> span3(span2.offset(1));
+    ASSERT_EQ(span3.size(), 4);
+    ASSERT_EQ(span3.at(0), 2);
+    ASSERT_EQ(span3.at(1), 3);
+    ASSERT_EQ(span3[2], 4);
+    ASSERT_EQ(span3[3], 5);
+
+    // const
+    ContiguousSpan<int32_t, true> span4(v1.begin(), v1.end());
+    ASSERT_EQ(span4.size(), 5);
+    ASSERT_EQ(span4.at(0), 1);
+    ASSERT_EQ(span4.at(1), 2);
+    ASSERT_EQ(span4.at(2), 3);
+    ASSERT_EQ(span4[3], 4);
+    ASSERT_EQ(span4[4], 5);
+
+    ContiguousSpan<int32_t, true> span5(span4.offset(1));
+    ASSERT_EQ(span5.size(), 4);
+    ASSERT_EQ(span5.at(0), 2);
+    ASSERT_EQ(span5.at(1), 3);
+    ASSERT_EQ(span5[2], 4);
+    ASSERT_EQ(span5[3], 5);
+  }
+
+  // uint32_t
+  {
+    std::vector<uint32_t> v1{1, 2, 3, 4, 5};
+    std::vector<uint32_t> v2(5);
+
+    ContiguousSpan<uint32_t> span1(v1.begin(), v1.end());
+    ContiguousSpan<uint32_t> span2(v2.begin(), v2.end());
+
+    span2.assign(span1);
+
+    ASSERT_EQ(v1, v2);
+    ASSERT_EQ(span1.size(), 5);
+    ASSERT_EQ(span2.size(), 5);
+
+    ASSERT_EQ(span2.at(2), 3);
+    ASSERT_EQ(span2.at(4), 5);
+
+    ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2));
+
+    ContiguousSpan<uint32_t> span3(span2.offset(1));
+    ASSERT_EQ(span3.size(), 4);
+    ASSERT_EQ(span3.at(0), 2);
+    ASSERT_EQ(span3.at(1), 3);
+    ASSERT_EQ(span3[2], 4);
+    ASSERT_EQ(span3[3], 5);
+
+    // const
+    ContiguousSpan<uint32_t, true> span4(v1.begin(), v1.end());
+    ASSERT_EQ(span4.size(), 5);
+    ASSERT_EQ(span4.at(0), 1);
+    ASSERT_EQ(span4.at(1), 2);
+    ASSERT_EQ(span4.at(2), 3);
+    ASSERT_EQ(span4[3], 4);
+    ASSERT_EQ(span4[4], 5);
+
+    ContiguousSpan<uint32_t, true> span5(span4.offset(1));
+    ASSERT_EQ(span5.size(), 4);
+    ASSERT_EQ(span5.at(0), 2);
+    ASSERT_EQ(span5.at(1), 3);
+    ASSERT_EQ(span5[2], 4);
+    ASSERT_EQ(span5[3], 5);
+  }
+
+  // int8_t
+  {
+    std::vector<int8_t> v1{1, 2, 3, 4, 5};
+    std::vector<int8_t> v2(5);
+
+    ContiguousSpan<int8_t> span1(v1.begin(), v1.end());
+    ContiguousSpan<int8_t> span2(v2.begin(), v2.end());
+
+    span2.assign(span1);
+
+    ASSERT_EQ(v1, v2);
+    ASSERT_EQ(span1.size(), 5);
+    ASSERT_EQ(span2.size(), 5);
+
+    ASSERT_EQ(span2.at(2), 3);
+    ASSERT_EQ(span2.at(4), 5);
+
+    ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2));
+
+    ContiguousSpan<int8_t> span3(span2.offset(1));
+    ASSERT_EQ(span3.size(), 4);
+    ASSERT_EQ(span3.at(0), 2);
+    ASSERT_EQ(span3.at(1), 3);
+    ASSERT_EQ(span3[2], 4);
+    ASSERT_EQ(span3[3], 5);
+
+    // const
+    ContiguousSpan<int8_t, true> span4(v1.begin(), v1.end());
+    ASSERT_EQ(span4.size(), 5);
+    ASSERT_EQ(span4.at(0), 1);
+    ASSERT_EQ(span4.at(1), 2);
+    ASSERT_EQ(span4.at(2), 3);
+    ASSERT_EQ(span4[3], 4);
+    ASSERT_EQ(span4[4], 5);
+
+    ContiguousSpan<int8_t, true> span5(span4.offset(1));
+    ASSERT_EQ(span5.size(), 4);
+    ASSERT_EQ(span5.at(0), 2);
+    ASSERT_EQ(span5.at(1), 3);
+    ASSERT_EQ(span5[2], 4);
+    ASSERT_EQ(span5[3], 5);
+  }
+}
diff --git a/runtime/libs/ndarray/src/detail/cxx14.h b/runtime/libs/ndarray/src/detail/cxx14.h
deleted file mode 100644
index 8b78fb985..000000000
--- a/runtime/libs/ndarray/src/detail/cxx14.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _NDARRAY_CXX14_H_
-#define _NDARRAY_CXX14_H_
-
-namespace ndarray
-{
-
-namespace cxx14
-{
-
-template <size_t... Nums> struct index_sequence
-{
-  using value_type = size_t;
-
-  static constexpr std::size_t size() noexcept { return sizeof...(Nums); }
-};
-
-namespace detail
-{
-
-template <size_t v, typename Seq> struct _append;
-
-template <size_t v, size_t... Nums> struct _append<v, index_sequence<Nums...>>
-{
-  using result = index_sequence<Nums..., v>;
-};
-
-template <size_t Len> struct make_index_sequence
-{
-  using result =
-    typename detail::_append<Len - 1, typename make_index_sequence<Len - 1>::result>::result;
-};
-
-template <> struct make_index_sequence<1>
-{
-  using result = index_sequence<0>;
-};
-
-template <> struct make_index_sequence<0>
-{
-  using result = index_sequence<>;
-};
-
-} // namespace detail
-
-template <size_t Num> using make_index_sequence = typename detail::make_index_sequence<Num>::result;
-
-} // namespace cxx14
-
-} // namespace ndarray
-
-#endif //_NDARRAY_CXX14_H_
diff --git a/runtime/libs/ndarray/test/CMakeLists.txt b/runtime/libs/ndarray/test/CMakeLists.txt
deleted file mode 100644
index be1ed6510..000000000
--- a/runtime/libs/ndarray/test/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-if(NOT TARGET ndarray)
-    return()
-endif()
-
-add_executable(ndarray_test ndarray_test.cpp)
-
-target_link_libraries(ndarray_test PRIVATE ndarray)
-
-nnfw_find_package(GTest)
-if(NOT GTest_FOUND)
-    message(STATUS "GTest not avaialble. Skipping NDArray test build")
-    return()
-endif(NOT GTest_FOUND)
-
-target_link_libraries(ndarray_test PUBLIC gtest gtest_main ${LIB_PTHREAD})
-
-add_test(ndarray_test ndarray_test)
-install(TARGETS ndarray_test DESTINATION unittest_standalone)
diff --git a/runtime/libs/ndarray/test/ndarray_test.cpp b/runtime/libs/ndarray/test/ndarray_test.cpp
deleted file mode 100644
index 4b5ad5765..000000000
--- a/runtime/libs/ndarray/test/ndarray_test.cpp
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "ndarray/Array.h"
-
-using namespace ndarray;
-
-TEST(NDArray_tests, basic_data_test)
-{
-
-  float raw_data[] = {1, 2, 3, 4};
-
-  Array<float> data22{raw_data, {2, 2}};
-
-  ASSERT_FLOAT_EQ(data22.at(0, 0), 1);
-  ASSERT_FLOAT_EQ(data22.at(0, 1), 2);
-  ASSERT_FLOAT_EQ(data22.at(1, 0), 3);
-  ASSERT_FLOAT_EQ(data22.at(1, 1), 4);
-  ASSERT_EQ(data22.shape().rank(), 2);
-  ASSERT_EQ(data22.shape().dim(0), 2);
-  ASSERT_EQ(data22.shape().dim(1), 2);
-
-  Array<float> data14{raw_data, {1, 4}};
-  ASSERT_FLOAT_EQ(data14.at(0, 0), 1);
-  ASSERT_FLOAT_EQ(data14.at(0, 1), 2);
-  ASSERT_FLOAT_EQ(data14.at(0, 2), 3);
-  ASSERT_FLOAT_EQ(data14.at(0, 3), 4);
-  ASSERT_EQ(data14.shape().rank(), 2);
-  ASSERT_EQ(data14.shape().dim(0), 1);
-  ASSERT_EQ(data14.shape().dim(1), 4);
-
-  ContiguousSpan<float> cs = data22.flat();
-  ASSERT_EQ(cs.size(), 4);
-  ASSERT_FLOAT_EQ(cs.at(3), 4);
-
-  Array<float> lv = std::move(data14);
-  ASSERT_FLOAT_EQ(lv.at(0, 0), 1);
-  ASSERT_FLOAT_EQ(lv.at(0, 1), 2);
-  ASSERT_FLOAT_EQ(lv.at(0, 2), 3);
-  ASSERT_FLOAT_EQ(lv.at(0, 3), 4);
-}
-
-TEST(NDArray_tests, slice_write_test)
-{
-  float raw_data[4] = {0};
-
-  Array<float> data22{raw_data, {2, 2}};
-
-  data22.slice(1) = {1, 2};
-
-  ASSERT_FLOAT_EQ(data22.at(0, 0), 0);
-  ASSERT_FLOAT_EQ(data22.at(0, 1), 0);
-  ASSERT_FLOAT_EQ(data22.at(1, 0), 1);
-  ASSERT_FLOAT_EQ(data22.at(1, 1), 2);
-}
-
-TEST(NDArray_tests, slice_read_test)
-{
-  float raw_data[4] = {1, 2, 3, 4};
-
-  Array<float> data22{raw_data, {2, 2}};
-
-  auto slice = data22.slice(1);
-
-  ASSERT_FLOAT_EQ(slice[0], 3);
-  ASSERT_FLOAT_EQ(slice[1], 4);
-}
-
-TEST(NDArray_tests, multidim_test)
-{
-  float raw_data[5] = {0, 1, 2, 3, 4};
-
-  Array<float> data22{raw_data, {1, 1, 1, 1, 5}};
-
-  ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 0), 0);
-  ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 1), 1);
-  ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 2), 2);
-  ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 3), 3);
-  ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 4), 4);
-}
-
-TEST(NDArray_tests, slice_assign_test)
-{
-  std::vector<float> v1{1, 2, 3, 4, 5};
-  std::vector<float> v2(5);
-
-  ContiguousSpan<float> span1(v1.begin(), v1.end());
-  ContiguousSpan<float> span2(v2.begin(), v2.end());
-
-  span2.assign(span1);
-
-  ASSERT_EQ(v1, v2);
-  ASSERT_EQ(span1.size(), 5);
-  ASSERT_EQ(span2.size(), 5);
-
-  ASSERT_EQ(span2.at(2), 3);
-  ASSERT_EQ(span2.at(4), 5);
-
-  ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2));
-
-  ContiguousSpan<float> span3(span2.offset(1));
-  ASSERT_EQ(span3.size(), 4);
-  ASSERT_EQ(span3.at(0), 2);
-  ASSERT_EQ(span3.at(1), 3);
-  ASSERT_EQ(span3.at(2), 4);
-  ASSERT_EQ(span3.at(3), 5);
-}
diff --git a/runtime/onert/CMakeLists.txt b/runtime/onert/CMakeLists.txt
index 88d52a5bd..3c9ca99da 100644
--- a/runtime/onert/CMakeLists.txt
+++ b/runtime/onert/CMakeLists.txt
@@ -7,9 +7,3 @@ add_subdirectory(frontend)
 add_subdirectory(core)
 add_subdirectory(api)
 add_subdirectory(sample)
-
-if(NOT ENABLE_TEST)
-  return()
-endif(NOT ENABLE_TEST)
-
-add_subdirectory(test)
diff --git a/runtime/onert/api/CMakeLists.txt b/runtime/onert/api/CMakeLists.txt
index beb243a4d..badd5d133 100644
--- a/runtime/onert/api/CMakeLists.txt
+++ b/runtime/onert/api/CMakeLists.txt
@@ -10,6 +10,7 @@ set(NNFW_API_HEADERS include/nnfw.h include/nnfw_experimental.h)
 
 target_link_libraries(${ONERT_DEV} PUBLIC nnfw-nnapi-header)
 target_link_libraries(${ONERT_DEV} PRIVATE onert_core)
+target_link_libraries(${ONERT_DEV} PRIVATE nnfw_lib_misc)
 target_link_libraries(${ONERT_DEV} PRIVATE jsoncpp tflite_loader circle_loader ${LIB_PTHREAD})
 target_link_libraries(${ONERT_DEV} PRIVATE trix_loader)
 target_link_libraries(${ONERT_DEV} PRIVATE nnfw_common)
diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h
index 6f296a931..658cba4d5 100644
--- a/runtime/onert/api/include/nnfw.h
+++ b/runtime/onert/api/include/nnfw.h
@@ -193,7 +193,7 @@ typedef struct nnfw_tensorinfo
  * And inference is performed after {@link nnfw_run} is invoked.
  *
  * <p>{@link nnfw_close_session} should be called once
- * if session is no longer need
+ * if session is no longer needed
  *
  * @param[out]  session The session to be created
  * @return      NNFW_STATUS_NO_ERROR if successful
@@ -213,7 +213,7 @@ NNFW_STATUS nnfw_close_session(nnfw_session *session);
 /**
  * @brief     Load model from nnpackage file or directory
  *
- * The length of \p package_file_path must not execeed 1024 bytes including zero at the end.
+ * The length of \p package_file_path must not exceed 1024 bytes including zero at the end.
  *
  * @param[in] session           nnfw_session loading the given nnpackage file/dir
  * @param[in] package_file_path Path to the nnpackage file or unzipped directory to be loaded
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h
index 45b34716a..2fbb96f31 100644
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
  * NNFW_VERSION is a uint32 value representing nnfw runtime version
  * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
  */
-#define NNFW_VERSION 0x01001400
+#define NNFW_VERSION 0x01001500
 
 #endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc
index 0ebd385e9..a0e6ee094 100644
--- a/runtime/onert/api/src/nnfw_api.cc
+++ b/runtime/onert/api/src/nnfw_api.cc
@@ -58,15 +58,7 @@ STATIC_ASSERT_ENUM_CHECK(NNFW_INFO_ID_VERSION, 0);
  * @param session the session to be created
  * @return NNFW_STATUS_NO_ERROR if successful
  */
-NNFW_STATUS nnfw_create_session(nnfw_session **session)
-{
-  NNFW_RETURN_ERROR_IF_NULL(session);
-
-  *session = new (std::nothrow) nnfw_session();
-  if (*session == nullptr)
-    return NNFW_STATUS_OUT_OF_MEMORY;
-  return NNFW_STATUS_NO_ERROR;
-}
+NNFW_STATUS nnfw_create_session(nnfw_session **session) { return nnfw_session::create(session); }
 
 /*
  * Close a session instance
diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc
index 62a043921..9b43dd381 100644
--- a/runtime/onert/api/src/nnfw_api_internal.cc
+++ b/runtime/onert/api/src/nnfw_api_internal.cc
@@ -25,6 +25,7 @@
 #include "tflite_loader.h"
 #include "trix_loader.h"
 #include "json/json.h"
+#include "ir/NNPkg.h"
 #include "ir/OpCode.h"
 #include "util/TracingCtx.h"
 
@@ -110,9 +111,7 @@ std::string trim(const std::string &value)
   return value.substr(begin, range);
 }
 
-using CfgKeyValues = std::unordered_map<std::string, std::string>;
-
-bool loadConfigure(const std::string cfgfile, CfgKeyValues &keyValues)
+bool loadConfigure(const std::string cfgfile, onert::util::CfgKeyValues &keyValues)
 {
   std::ifstream ifs(cfgfile);
   if (ifs.is_open())
@@ -143,19 +142,6 @@ bool loadConfigure(const std::string cfgfile, CfgKeyValues &keyValues)
   return false;
 }
 
-void setConfigKeyValues(const CfgKeyValues &keyValues)
-{
-  auto configsrc = std::make_unique<onert::util::GeneralConfigSource>();
-
-  for (auto it = keyValues.begin(); it != keyValues.end(); ++it)
-  {
-    VERBOSE(NNPKG_CONFIGS) << "(" << it->first << ") = (" << it->second << ")" << std::endl;
-    configsrc->set(it->first, it->second);
-  }
-
-  onert::util::config_source_ext(std::move(configsrc));
-}
-
 NNFW_TYPE datatype_to_nnfw_dtype(onert::ir::DataType dt)
 {
   using onert::ir::DataType;
@@ -195,15 +181,59 @@ void fillTensorInfo(nnfw_tensorinfo *ti, const onert::ir::Shape &shape,
   ti->dtype = datatype_to_nnfw_dtype(dtype);
 }
 
+std::unique_ptr<onert::ir::Model> loadModel(const std::string filename,
+                                            const std::string model_type)
+{
+  if (model_type == "tflite")
+    return onert::tflite_loader::loadModel(filename.c_str());
+  if (model_type == "circle")
+    return onert::circle_loader::loadModel(filename.c_str());
+  if (model_type == "tvn")
+    return onert::trix_loader::loadModel(filename.c_str());
+
+  std::cerr << "Unsupported model type" << std::endl;
+  return std::unique_ptr<onert::ir::Model>(nullptr);
+}
+
 } // namespace
 
 nnfw_session::nnfw_session()
-  : _subgraphs{nullptr}, _compiler{nullptr}, _execution{nullptr},
-    _kernel_registry{std::make_shared<onert::api::CustomKernelRegistry>()}, _tracing_ctx{nullptr}
+  : _nnpkg{nullptr}, _coptions{}, _compiler_artifact{nullptr}, _execution{nullptr},
+    _kernel_registry{nullptr}
 {
   // DO NOTHING
 }
 
+NNFW_STATUS nnfw_session::create(nnfw_session **session)
+{
+  if (session == nullptr)
+    return NNFW_STATUS_UNEXPECTED_NULL;
+
+  // Create session
+  *session = new (std::nothrow) nnfw_session();
+  if (*session == nullptr)
+  {
+    std::cerr << "Error during session creation" << std::endl;
+    return NNFW_STATUS_OUT_OF_MEMORY;
+  }
+
+  // Initialize fields
+  try
+  {
+    (*session)->_kernel_registry = std::make_shared<onert::api::CustomKernelRegistry>();
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << "Error during session initialization : " << e.what() << std::endl;
+    delete *session;
+    *session = nullptr;
+
+    return NNFW_STATUS_ERROR;
+  }
+
+  return NNFW_STATUS_NO_ERROR;
+}
+
 nnfw_session::~nnfw_session() = default;
 
 NNFW_STATUS nnfw_session::load_circle_from_buffer(uint8_t *buffer, size_t size)
@@ -219,19 +249,16 @@ NNFW_STATUS nnfw_session::load_circle_from_buffer(uint8_t *buffer, size_t size)
 
   try
   {
-    _subgraphs = onert::circle_loader::loadModel(buffer, size);
+    auto model = onert::circle_loader::loadModel(buffer, size);
+    _nnpkg = std::make_shared<onert::ir::NNPkg>(std::move(model));
+    _coptions.push_back(onert::compiler::CompilerOptions::fromGlobalConfig());
+    _state = State::MODEL_LOADED;
   }
   catch (const std::exception &e)
   {
     std::cerr << "Error during model loading : " << e.what() << std::endl;
     return NNFW_STATUS_ERROR;
   }
-
-  _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get());
-
-  _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get());
-
-  _state = State::MODEL_LOADED;
   return NNFW_STATUS_NO_ERROR;
 }
 
@@ -247,45 +274,28 @@ NNFW_STATUS nnfw_session::load_model_from_modelfile(const char *model_file_path)
   }
 
   std::string filename{model_file_path};
-  if (filename.size() < 8) // .tflite or .circle
+  // TODO: Use std::filesystem::path when we can use c++17.
+  auto dotidx = filename.find_last_of('.');
+  if (dotidx == std::string::npos)
   {
-    std::cerr << "Invalid model file path." << std::endl;
+    std::cerr << "Invalid model file path. Please use file with extension." << std::endl;
     return NNFW_STATUS_ERROR;
   }
-
-  std::string model_type = filename.substr(filename.size() - 7, 7);
-
+  std::string model_type = filename.substr(dotidx + 1); // + 1 to exclude dot
   try
   {
-    if (model_type == ".tflite")
-    {
-      _subgraphs = onert::tflite_loader::loadModel(filename.c_str());
-    }
-    else if (model_type == ".circle")
-    {
-      _subgraphs = onert::circle_loader::loadModel(filename.c_str());
-    }
-    else if (model_type == ".tvn")
-    {
-      _subgraphs = onert::trix_loader::loadModel(filename.c_str());
-    }
-    else
-    {
-      std::cerr << "Unsupported model type" << std::endl;
+    auto model = loadModel(filename, model_type);
+    if (model == nullptr)
       return NNFW_STATUS_ERROR;
-    }
+    _nnpkg = std::make_shared<onert::ir::NNPkg>(std::move(model));
+    _coptions.push_back(onert::compiler::CompilerOptions::fromGlobalConfig());
+    _state = State::MODEL_LOADED;
   }
   catch (const std::exception &e)
   {
     std::cerr << "Error during model loading : " << e.what() << std::endl;
     return NNFW_STATUS_ERROR;
   }
-
-  _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get());
-
-  _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get());
-
-  _state = State::MODEL_LOADED;
   return NNFW_STATUS_NO_ERROR;
 }
 
@@ -334,45 +344,59 @@ NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir)
     {
       auto filepath = package_path + std::string("/metadata/") + configs[0].asString();
 
-      CfgKeyValues keyValues;
+      onert::util::CfgKeyValues keyValues;
       if (loadConfigure(filepath, keyValues))
       {
-        setConfigKeyValues(keyValues);
+        onert::util::setConfigKeyValues(keyValues);
       }
     }
-
-    auto model_file_path = package_path + std::string("/") + models[0].asString(); // first model
-    auto model_type = model_types[0].asString(); // first model's type
-    if (model_type == "tflite")
+    _nnpkg = std::make_shared<onert::ir::NNPkg>();
+    for (uint32_t i = 0; i < models.size(); ++i)
     {
-      _subgraphs = onert::tflite_loader::loadModel(model_file_path);
-    }
-    else if (model_type == "circle")
-    {
-      _subgraphs = onert::circle_loader::loadModel(model_file_path);
-    }
-    else if (model_type == "tvn")
-    {
-      _subgraphs = onert::trix_loader::loadModel(model_file_path);
+      auto model_file_path = package_path + std::string("/") + models[i].asString();
+      auto model_type = model_types[i].asString();
+      auto model = loadModel(model_file_path, model_type);
+      if (model == nullptr)
+        return NNFW_STATUS_ERROR;
+      model->primary_subgraph()->bindKernelBuilder(_kernel_registry->getBuilder());
+      _nnpkg->push(onert::ir::ModelIndex{i}, std::move(model));
+      _coptions.push_back(onert::compiler::CompilerOptions::fromGlobalConfig());
     }
-    else
+
+    auto toIODesc = [](std::string str) {
+      auto indices = nnfw::misc::split(str, ':');
+      if (indices.size() != 3)
+      {
+        std::cerr << "IODesc should be 3-tuple." << std::endl;
+        return onert::ir::IODesc{};
+      }
+      auto model_idx = static_cast<uint32_t>(std::stoi(indices.at(0)));
+      auto subgraph_idx = static_cast<uint32_t>(std::stoi(indices.at(1)));
+      auto operand_idx = static_cast<uint32_t>(std::stoi(indices.at(2)));
+      return onert::ir::IODesc{model_idx, subgraph_idx, operand_idx};
+    };
+    // read pkg-inputs and pkg-outputs
+    const Json::Value &pkg_inputs = root["pkg-inputs"];
+    for (uint32_t i = 0; i < pkg_inputs.size(); ++i)
+      _nnpkg->addInput(toIODesc(pkg_inputs[i].asString()));
+    const Json::Value &pkg_outputs = root["pkg-outputs"];
+    for (uint32_t i = 0; i < pkg_outputs.size(); ++i)
+      _nnpkg->addOutput(toIODesc(pkg_outputs[i].asString()));
+    // read model-connect
+    const Json::Value &fromtos = root["model-connect"];
+    for (uint32_t i = 0; i < fromtos.size(); ++i)
     {
-      std::cerr << "Unsupported model type in MANIFEST" << std::endl;
-      return NNFW_STATUS_ERROR;
+      const Json::Value &tos = fromtos[i]["to"];
+      for (uint32_t j = 0; j < tos.size(); ++j)
+        _nnpkg->addEdge(toIODesc(fromtos[i]["from"].asString()), toIODesc(tos[j].asString()));
     }
-    _subgraphs->primary()->bindKernelBuilder(_kernel_registry->getBuilder());
+    _state = State::MODEL_LOADED;
   }
   catch (const std::exception &e)
   {
     std::cerr << "Error during model loading : " << e.what() << std::endl;
     return NNFW_STATUS_ERROR;
   }
-
-  _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get());
-
-  _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get());
-
-  _state = State::MODEL_LOADED;
   return NNFW_STATUS_NO_ERROR;
 }
 
@@ -396,9 +420,17 @@ NNFW_STATUS nnfw_session::prepare()
 
   try
   {
-    _subgraphs.reset();
-    std::shared_ptr<onert::exec::ExecutorMap> executors = _compiler->compile();
-    _execution = std::make_unique<onert::exec::Execution>(executors);
+    // TODO: Compile all models in case of multiple models
+    if (_nnpkg->model_count() > 2)
+    {
+      std::cerr << "Error during model prepare : more than 3 multiple models are not supported yet."
+                << std::endl;
+      return NNFW_STATUS_ERROR;
+    }
+    auto compiler = std::make_unique<onert::compiler::Compiler>(_nnpkg, _coptions);
+    _nnpkg.reset();
+    _compiler_artifact = compiler->compile();
+    _execution = std::make_unique<onert::exec::Execution>(_compiler_artifact->_executors);
   }
   catch (const std::exception &e)
   {
@@ -430,13 +462,14 @@ NNFW_STATUS nnfw_session::prepare_pipeline(const char *map_file_path)
 
   try
   {
-    _subgraphs.reset();
-    std::vector<std::shared_ptr<onert::exec::ExecutorMap>> executor_maps =
-      _compiler->compile(_package_file_path.c_str(), map_file_path);
+    auto model = _nnpkg->primary_model();
+    auto compiler = std::make_unique<onert::compiler::Compiler>(model, *_coptions[0]);
+    _nnpkg.reset();
+    auto artifacts = compiler->compile(_package_file_path.c_str(), map_file_path);
 
-    for (auto it = executor_maps.begin(); it != executor_maps.end(); ++it)
+    for (auto it = artifacts.begin(); it != artifacts.end(); ++it)
     {
-      _executions.push_back(std::make_shared<onert::exec::Execution>(*it));
+      _executions.push_back(std::make_shared<onert::exec::Execution>(it->get()->_executors));
     }
     make_dependency();
     _threads.resize(_executions.size());
@@ -740,7 +773,8 @@ NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti)
   {
     // In this case, if we apply input shape in primary_subgraph, it will propagate after
     // compilation and excution
-    auto primary_subgraph = _subgraphs->primary();
+    auto model = _nnpkg->primary_model();
+    auto primary_subgraph = model->primary_subgraph();
     auto ind = primary_subgraph->getInputs().at(index);
     auto &input = primary_subgraph->operands().at(ind);
 
@@ -851,12 +885,12 @@ void nnfw_session::make_dependency()
 {
   for (uint32_t out_exe = 0; out_exe < _executions.size(); out_exe++)
   {
-    auto out_graph = _executions[out_exe]->primary_subgraph();
+    auto &out_graph = _executions[out_exe]->primary_subgraph();
     for (uint32_t in_exe = 0; in_exe < _executions.size(); in_exe++)
     {
       if (out_exe == in_exe)
         continue;
-      auto in_graph = _executions[in_exe]->primary_subgraph();
+      auto &in_graph = _executions[in_exe]->primary_subgraph();
       for (auto out = out_graph._name_to_output_begin(); out != out_graph._name_to_output_end();
            out++)
       {
@@ -971,7 +1005,7 @@ NNFW_STATUS nnfw_session::set_available_backends(const char *backends)
     if (null_terminating(backends, MAX_BACKEND_NAME_LENGTH) == false)
       return NNFW_STATUS_ERROR;
 
-    auto &options = _compiler->options();
+    auto &options = *_coptions[0];
 
     using namespace onert::util;
 
@@ -1005,7 +1039,7 @@ NNFW_STATUS nnfw_session::set_op_backend(const char *op, const char *backend)
       return NNFW_STATUS_ERROR;
     }
 
-    auto &opcode_to_backend = _compiler->options().manual_scheduler_options.opcode_to_backend;
+    auto &opcode_to_backend = _coptions[0]->manual_scheduler_options.opcode_to_backend;
     opcode_to_backend.emplace(onert::ir::toOpCode(key), backend);
   }
   catch (const std::exception &e)
@@ -1024,7 +1058,7 @@ NNFW_STATUS nnfw_session::set_config(const char *key, const char *value)
   if (!key || !value)
     return NNFW_STATUS_UNEXPECTED_NULL;
 
-  auto &options = _compiler->options();
+  auto &options = *_coptions[0];
 
   using namespace onert::util;
 
@@ -1067,14 +1101,14 @@ NNFW_STATUS nnfw_session::set_config(const char *key, const char *value)
 
 const onert::ir::Graph *nnfw_session::primary_subgraph()
 {
-  if (_subgraphs)
+  if (_nnpkg != nullptr)
   {
-    assert(!_execution && _executions.empty());
-    return _subgraphs->primary().get();
+    assert(_execution == nullptr && _executions.empty());
+    return _nnpkg->primary_model()->primary_subgraph().get();
   }
   else
   {
-    assert(_execution || !_executions.empty());
+    assert(_execution != nullptr || !_executions.empty());
     // TODO Remove const_cast
     // We assumed the graph will not change after compilation, but shape could change
     if (!_executions.empty())
@@ -1094,7 +1128,7 @@ NNFW_STATUS nnfw_session::get_config(const char *key, char *value, size_t value_
   if (!key || !value)
     return NNFW_STATUS_UNEXPECTED_NULL;
 
-  auto &options = _compiler->options();
+  auto &options = *_coptions[0];
 
   auto check_boundary = [](size_t dest_size, std::string &src) {
     if (dest_size < src.length() + 1 /* for '\0' */)
@@ -1138,9 +1172,9 @@ bool nnfw_session::isStateInitialized()
 {
   if (_state == State::INITIALIZED)
   {
-    assert(!_subgraphs);
-    assert(!_compiler);
-    assert(!_execution && _executions.empty());
+    assert(_nnpkg == nullptr);
+    assert(_coptions.empty());
+    assert(_execution == nullptr && _executions.empty());
     return true;
   }
   else
@@ -1153,9 +1187,9 @@ bool nnfw_session::isStateModelLoaded()
 {
   if (_state == State::MODEL_LOADED)
   {
-    assert(_subgraphs);
-    assert(_compiler);
-    assert(!_execution && _executions.empty());
+    assert(_nnpkg != nullptr);
+    assert(!_coptions.empty());
+    assert(_execution == nullptr && _executions.empty());
     return true;
   }
   else
@@ -1168,9 +1202,9 @@ bool nnfw_session::isStatePrepared()
 {
   if (_state == State::PREPARED)
   {
-    assert(!_subgraphs);
-    assert(_compiler);
-    assert(_execution || !_executions.empty());
+    assert(_nnpkg == nullptr);
+    assert(!_coptions.empty());
+    assert(_execution != nullptr || !_executions.empty());
     return true;
   }
   else
@@ -1183,9 +1217,9 @@ bool nnfw_session::isStateRunning()
 {
   if (_state == State::RUNNING)
   {
-    assert(!_subgraphs);
-    assert(_compiler);
-    assert(_execution || !_executions.empty());
+    assert(_nnpkg == nullptr);
+    assert(!_coptions.empty());
+    assert(_execution != nullptr || !_executions.empty());
     return true;
   }
   return false;
@@ -1195,9 +1229,9 @@ bool nnfw_session::isStateFinishedRun()
 {
   if (_state == State::FINISHED_RUN)
   {
-    assert(!_subgraphs);
-    assert(_compiler);
-    assert(_execution || !_executions.empty());
+    assert(_nnpkg == nullptr);
+    assert(!_coptions.empty());
+    assert(_execution != nullptr || !_executions.empty());
     return true;
   }
   else
@@ -1224,9 +1258,14 @@ NNFW_STATUS nnfw_session::output_tensorindex(const char *tensorname, uint32_t *i
 NNFW_STATUS nnfw_session::set_backends_per_operation(const char *backend_settings)
 {
   if (backend_settings == NULL)
-  {
     return NNFW_STATUS_ERROR;
-  }
-  _compiler->set_backend_from_str(backend_settings);
+
+  if (!isStateModelLoaded())
+    return NNFW_STATUS_INVALID_STATE;
+
+  // Backend for all
+  auto &ms_options = _coptions[0]->manual_scheduler_options;
+  ms_options.setBackendMap(std::string{backend_settings});
+
   return NNFW_STATUS_NO_ERROR;
 }
diff --git a/runtime/onert/api/src/nnfw_api_internal.h b/runtime/onert/api/src/nnfw_api_internal.h
index 6d75d894f..9b729fd5f 100644
--- a/runtime/onert/api/src/nnfw_api_internal.h
+++ b/runtime/onert/api/src/nnfw_api_internal.h
@@ -20,7 +20,6 @@
 #include "nnfw.h"
 #include "nnfw_experimental.h"
 
-#include <util/GeneralConfigSource.h>
 #include <util/TracingCtx.h>
 
 #include <string>
@@ -41,11 +40,13 @@ class Execution;
 namespace ir
 {
 class Graph;
-class Subgraphs;
+class Model;
+class NNPkg;
 } // namespace ir
 namespace compiler
 {
-class Compiler;
+struct CompilerArtifact;
+class CompilerOptions;
 } // namespace compiler
 } // namespace onert
 
@@ -97,9 +98,18 @@ private:
   };
 
 public:
+  /**
+   * @brief Factory method. It creates and initialize nnfw_session
+   *
+   * @note  Use factory instead of constructor to get status
+   */
+  static NNFW_STATUS create(nnfw_session **session);
+
+private:
   nnfw_session();
-  ~nnfw_session();
 
+public:
+  ~nnfw_session();
   NNFW_STATUS load_model_from_nnpackage(const char *package_file_path);
   NNFW_STATUS prepare();
   NNFW_STATUS prepare_pipeline(const char *map_file_path);
@@ -148,6 +158,10 @@ public:
   NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func);
   NNFW_STATUS input_tensorindex(const char *tensorname, uint32_t *index);
   NNFW_STATUS output_tensorindex(const char *tensorname, uint32_t *index);
+  /**
+   * @brief   Set backends with string-encoded mapping from operation index to backend type
+   *          (cpu, acl_cl)
+   */
   NNFW_STATUS set_backends_per_operation(const char *backend_settings);
 
 private:
@@ -161,15 +175,14 @@ private:
 
 private:
   State _state{State::INITIALIZED};
-  std::shared_ptr<onert::ir::Subgraphs> _subgraphs;
-  std::unique_ptr<onert::compiler::Compiler> _compiler;
+  std::shared_ptr<onert::ir::NNPkg> _nnpkg;
+  std::vector<std::unique_ptr<onert::compiler::CompilerOptions>> _coptions;
+  std::shared_ptr<onert::compiler::CompilerArtifact> _compiler_artifact;
   std::unique_ptr<onert::exec::Execution> _execution;
   std::shared_ptr<onert::api::CustomKernelRegistry> _kernel_registry;
   std::vector<std::thread> _threads;
   std::vector<std::shared_ptr<onert::exec::Execution>> _executions;
   std::string _package_file_path;
-
-  std::unique_ptr<onert::util::TracingCtx> _tracing_ctx;
 };
 
 #endif // __API_NNFW_API_INTERNAL_H__
diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h
index 945ad83bb..301ded01f 100644
--- a/runtime/onert/backend/acl_cl/Backend.h
+++ b/runtime/onert/backend/acl_cl/Backend.h
@@ -46,8 +46,10 @@ public:
   {
     const auto &graph = *data.graph;
     const auto &operands = data.graph->operands();
+    const auto is_linear_executor = data.is_linear_executor;
+
     auto context = std::make_unique<acl_cl::BackendContext>(this, std::move(data));
-    auto tm = createTensorManager(data.is_linear_executor);
+    auto tm = createTensorManager(is_linear_executor);
     auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
     auto tb = std::make_shared<TensorBuilder>(operands, tm);
     context->tensor_registry = tr;
diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h
index 62b163b11..1c7713055 100644
--- a/runtime/onert/backend/acl_neon/Backend.h
+++ b/runtime/onert/backend/acl_neon/Backend.h
@@ -46,8 +46,10 @@ public:
   {
     const auto &graph = *data.graph;
     const auto &operands = data.graph->operands();
+    const auto is_linear_executor = data.is_linear_executor;
+
     auto context = std::make_unique<acl_neon::BackendContext>(this, std::move(data));
-    auto tm = createTensorManager(data.is_linear_executor);
+    auto tm = createTensorManager(is_linear_executor);
     auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
     auto tb = std::make_shared<TensorBuilder>(operands, tm);
     context->tensor_registry = tr;
diff --git a/runtime/onert/backend/cpu/CMakeLists.txt b/runtime/onert/backend/cpu/CMakeLists.txt
index b61e58251..99643b983 100644
--- a/runtime/onert/backend/cpu/CMakeLists.txt
+++ b/runtime/onert/backend/cpu/CMakeLists.txt
@@ -6,7 +6,7 @@ file(GLOB_RECURSE SOURCES "*.cc")
 
 add_library(${LIB_ONERT_BACKEND_CPU} SHARED ${SOURCES})
 
-target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_lib_cker)
+target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_lib_cker nnfw_lib_misc)
 target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE onert_core)
 target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_common)
 target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_coverage)
diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h
index ab0bb5f10..6ed4799a8 100644
--- a/runtime/onert/backend/cpu/ExternalContext.h
+++ b/runtime/onert/backend/cpu/ExternalContext.h
@@ -20,6 +20,8 @@
 #include <util/ConfigSource.h>
 #include <ruy/context.h>
 
+#include <memory>
+
 namespace onert
 {
 namespace backend
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 75274dc88..762ee7392 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -244,17 +244,13 @@ std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationI
   assert(_tensor_builder->dynamicTensorManager());
   assert(_tensor_reg);
 
-  auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
-
   // Prepare to handle dynamic tensors later
   auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
   {
-    dyn_ctx->op_ind = ind;
-    dyn_ctx->operations = &_operations_ctx;
-    dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-
-    ret->dynamic_tensor_ctx(dyn_ctx);
+    dyn_ctx->op = &_operations_ctx.at(ind);
+    dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
   }
+  ret->dynamic_tensor_ctx(dyn_ctx);
 
   auto &op = _graph.operations().at(ind);
   op.accept(*this);
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
index 2255d5e9f..4672fe406 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
@@ -63,7 +63,7 @@ void ConvolutionLayer::convFloat32()
          getBuffer<float>(_output));
 }
 
-void ConvolutionLayer::convQuant8()
+void ConvolutionLayer::convQ8uPerTensor()
 {
   int32_t output_activation_min = 0;
   int32_t output_activation_max = 0;
@@ -99,7 +99,33 @@ void ConvolutionLayer::convQuant8()
          getBuffer<uint8_t>(_output));
 }
 
-void ConvolutionLayer::convQuant8PerChannel()
+void ConvolutionLayer::convQ8uPerChannel()
+{
+  nnfw::cker::ConvParams op_params;
+  op_params.padding_values.width = _paddingLeft;
+  op_params.padding_values.height = _paddingTop;
+  op_params.stride_width = _strideWidth;
+  op_params.stride_height = _strideHeight;
+  op_params.dilation_width_factor = _dilationWidthFactor;
+  op_params.dilation_height_factor = _dilationHeightFactor;
+  op_params.input_offset = -_input->data_zero_point();
+  op_params.output_offset = _output->data_zero_point();
+  int32_t output_activation_min = 0;
+  int32_t output_activation_max = 0;
+  CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+                                    &output_activation_max);
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  // NOTE: The following fields of ConvParams are not used:
+  // padding_type, weights_offset, output_{multiplier,shift}, float_activation_{min,max}
+
+  nnfw::cker::Conv &kernel = *_conv_kernel;
+  kernel(op_params, getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel),
+         getBuffer<uint8_t>(_kernel), _kernel->data_zero_points().data(), getShape(_bias),
+         getBuffer<int32_t>(_bias), getShape(_output), getBuffer<uint8_t>(_output));
+}
+
+void ConvolutionLayer::convQ8i()
 {
   int32_t output_activation_min = 0;
   int32_t output_activation_max = 0;
@@ -189,11 +215,15 @@ void ConvolutionLayer::run()
   }
   else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
   {
-    convQuant8();
+    const bool per_channel_quantized = _kernel->data_scales().size() > 1;
+    if (per_channel_quantized)
+      convQ8uPerChannel();
+    else
+      convQ8uPerTensor();
   }
   else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
   {
-    convQuant8PerChannel();
+    convQ8i();
   }
   else
   {
@@ -210,8 +240,8 @@ void ConvolutionLayer::prepare()
   if (_input->data_type() == OperandType::FLOAT32 && _kernel->is_constant())
   {
     bool is_transposed = false;
-    kernel.prepare(getShape(_kernel), getBuffer<float>(_kernel), getPaddingType(_paddingType),
-                   is_transposed, _dilationWidthFactor, _dilationHeightFactor);
+    kernel.prepareF32(getShape(_kernel), getBuffer<float>(_kernel), getPaddingType(_paddingType),
+                      is_transposed, _dilationWidthFactor, _dilationHeightFactor);
 
     // Decrease reference of _kernel(weights) only when _kernel is constant
     if (is_transposed)
@@ -225,8 +255,20 @@ void ConvolutionLayer::prepare()
   else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _kernel->is_constant() &&
            !_input->is_dynamic() && !_output->is_dynamic())
   {
-    kernel.prepareQuant(getShape(_input), getShape(_kernel), getShape(_output), _strideWidth,
-                        _strideHeight, _dilationWidthFactor, _dilationHeightFactor);
+    const bool per_channel_quantized = _kernel->data_scales().size() > 1;
+    if (per_channel_quantized)
+    {
+      GetQuantizedConvolutionMultipliersAndShifts(
+        _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
+        _kernel->data_scales().size(), getShape(_kernel).Dims(0),
+        kernel.per_channel_output_multiplier(), kernel.per_channel_output_shift());
+    }
+    else
+    {
+      kernel.prepareQ8uPerTensor(getShape(_input), getShape(_kernel), getShape(_output),
+                                 _strideWidth, _strideHeight, _dilationWidthFactor,
+                                 _dilationHeightFactor);
+    }
   }
   else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
   {
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
index 5d7f7c296..9f5253c8e 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
@@ -50,9 +50,10 @@ public:
 public:
   void convFloat32();
 
-  void convQuant8();
+  void convQ8uPerTensor();
+  void convQ8uPerChannel();
 
-  void convQuant8PerChannel();
+  void convQ8i();
 
   void configure(const IPortableTensor *input, const IPortableTensor *kernel,
                  const IPortableTensor *bias, ir::PaddingType _paddingType,
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
index 30641ecae..8a48497d5 100644
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
@@ -49,7 +49,7 @@ void DepthwiseConvolutionLayer::convFloat32()
     getBuffer<float>(_output), _external_context->ruy_context());
 }
 
-void DepthwiseConvolutionLayer::convQuant8()
+void DepthwiseConvolutionLayer::convQ8uPerTensor()
 {
   int32_t output_activation_min = 0;
   int32_t output_activation_max = 0;
@@ -84,11 +84,39 @@ void DepthwiseConvolutionLayer::convQuant8()
     getBuffer<uint8_t>(_output), _external_context->ruy_context());
 }
 
-void DepthwiseConvolutionLayer::convQuant8PerChannel()
+void DepthwiseConvolutionLayer::convQ8uPerChannel()
+{
+  nnfw::cker::DepthwiseConvParams op_params;
+  op_params.padding_values.width = _paddingLeft;
+  op_params.padding_values.height = _paddingTop;
+  op_params.stride_width = _strideWidth;
+  op_params.stride_height = _strideHeight;
+  op_params.dilation_width_factor = _dilationWidth;
+  op_params.dilation_height_factor = _dilationHeight;
+  op_params.depth_multiplier = _multiplier;
+  op_params.input_offset = -_input->data_zero_point();
+  op_params.output_offset = _output->data_zero_point();
+  int32_t output_activation_min = 0;
+  int32_t output_activation_max = 0;
+  CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+                                    &output_activation_max);
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  // NOTE: The following fields of ConvParams are not used:
+  // padding_type, weights_offset, output_{multiplier,shift}, float_activation_{min,max}
+
+  nnfw::cker::reference_integer_ops::DepthwiseConvPerChannel(
+    op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
+    getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel), getBuffer<uint8_t>(_kernel),
+    _kernel->data_zero_points().data(), getShape(_bias), getBuffer<int32_t>(_bias),
+    getShape(_output), getBuffer<uint8_t>(_output));
+}
+
+void DepthwiseConvolutionLayer::convQ8i()
 {
   if (!_prepared)
   {
-    prepareQuant8PerChannel();
+    prepareQ8i();
     _prepared = true;
   }
 
@@ -119,7 +147,15 @@ void DepthwiseConvolutionLayer::convQuant8PerChannel()
     _external_context->ruy_context());
 }
 
-void DepthwiseConvolutionLayer::prepareQuant8PerChannel()
+void DepthwiseConvolutionLayer::prepareQ8i()
+{
+  GetQuantizedConvolutionMultipliersAndShifts(
+    _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
+    _kernel->data_scales().size(), getShape(_kernel).Dims(3), _per_channel_output_multiplier,
+    _per_channel_output_shift);
+}
+
+void DepthwiseConvolutionLayer::prepareQ8uPerChannel()
 {
   GetQuantizedConvolutionMultipliersAndShifts(
     _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
@@ -155,7 +191,17 @@ void DepthwiseConvolutionLayer::configure(
   {
     if (_kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic())
     {
-      prepareQuant8PerChannel();
+      prepareQ8i();
+      _prepared = true;
+    }
+  }
+  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _kernel->is_constant() &&
+           !_input->is_dynamic() && !_output->is_dynamic())
+  {
+    const bool per_channel_quantized = _kernel->data_scales().size() > 1;
+    if (per_channel_quantized)
+    {
+      prepareQ8uPerChannel();
       _prepared = true;
     }
   }
@@ -169,11 +215,15 @@ void DepthwiseConvolutionLayer::run()
   }
   else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
   {
-    convQuant8();
+    const bool per_channel_quantized = _kernel->data_scales().size() > 1;
+    if (per_channel_quantized)
+      convQ8uPerChannel();
+    else
+      convQ8uPerTensor();
   }
   else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
   {
-    convQuant8PerChannel();
+    convQ8i();
   }
   else
   {
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
index 720550636..5c910109a 100644
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
@@ -40,9 +40,10 @@ public:
 public:
   void convFloat32();
 
-  void convQuant8();
+  void convQ8uPerTensor();
+  void convQ8uPerChannel();
 
-  void convQuant8PerChannel();
+  void convQ8i();
 
   void configure(const IPortableTensor *input, const IPortableTensor *kernel,
                  const IPortableTensor *bias, const uint32_t paddingLeft,
@@ -55,7 +56,8 @@ public:
   void run() override;
 
 private:
-  void prepareQuant8PerChannel();
+  void prepareQ8i();
+  void prepareQ8uPerChannel();
 
 private:
   const IPortableTensor *_input{nullptr};
diff --git a/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc
index 8a6fe6504..d89741c86 100644
--- a/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc
+++ b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc
@@ -121,7 +121,9 @@ Array<const CornerBox> decodeBoxes(const Array<float> &raw_boxes, const Array<fl
       assert(box.y2 > box.y1);
     }
 
-    return array_cast<const CornerBox>(std::move(decoded_boxes_a), decoded_boxes_a.shape());
+    auto decoded_boxes_a_shape = decoded_boxes_a.shape();
+
+    return array_cast<const CornerBox>(std::move(decoded_boxes_a), decoded_boxes_a_shape);
   }
 }
 
diff --git a/runtime/onert/backend/ruy/ExternalContext.h b/runtime/onert/backend/ruy/ExternalContext.h
index 3cc4eaa5a..c73ae636e 100644
--- a/runtime/onert/backend/ruy/ExternalContext.h
+++ b/runtime/onert/backend/ruy/ExternalContext.h
@@ -20,6 +20,8 @@
 #include <util/ConfigSource.h>
 #include <ruy/context.h>
 
+#include <memory>
+
 namespace onert
 {
 namespace backend
diff --git a/runtime/onert/backend/ruy/KernelGenerator.cc b/runtime/onert/backend/ruy/KernelGenerator.cc
index c2f6a1f79..b2bbf9bfc 100644
--- a/runtime/onert/backend/ruy/KernelGenerator.cc
+++ b/runtime/onert/backend/ruy/KernelGenerator.cc
@@ -42,17 +42,13 @@ std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationI
   assert(_tensor_builder->dynamicTensorManager());
   assert(_tensor_reg);
 
-  auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
-
   // Prepare to handle dynamic tensors later
   auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
   {
-    dyn_ctx->op_ind = ind;
-    dyn_ctx->operations = &_operations_ctx;
-    dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-
-    ret->dynamic_tensor_ctx(dyn_ctx);
+    dyn_ctx->op = &_operations_ctx.at(ind);
+    dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
   }
+  ret->dynamic_tensor_ctx(dyn_ctx);
 
   auto &op = _graph.operations().at(ind);
   op.accept(*this);
diff --git a/runtime/onert/backend/trix/CMakeLists.txt b/runtime/onert/backend/trix/CMakeLists.txt
index 5455757ca..a94be247d 100644
--- a/runtime/onert/backend/trix/CMakeLists.txt
+++ b/runtime/onert/backend/trix/CMakeLists.txt
@@ -1,6 +1,6 @@
 set(LIB_ONERT_BACKEND_TRIX onert_backend_trix)
 
-nnfw_find_package(TRIXEngine EXACT 2.5.0 QUIET)
+nnfw_find_package(TRIXEngine QUIET 2.5.0)
 if(NOT TRIXEngine_FOUND)
   return()
 endif(NOT TRIXEngine_FOUND)
diff --git a/runtime/onert/backend/trix/DevContext.h b/runtime/onert/backend/trix/DevContext.h
index 482932fd4..a7dbd7a59 100644
--- a/runtime/onert/backend/trix/DevContext.h
+++ b/runtime/onert/backend/trix/DevContext.h
@@ -32,28 +32,42 @@ public:
   DevContext()
   {
     auto device_count = getnumNPUdeviceByType(NPUCOND_TRIV2_CONN_SOCIP);
+    // TODO: x64 platform has 3 cores. We do not support more that 2 cores for now.
+    if (device_count > 2)
+    {
+      device_count = 2;
+    }
+
     if (device_count <= 0)
     {
-      throw std::runtime_error("Unable to find TRIV2 NPU device");
+      throw std::runtime_error("Unable to find TRIX NPU device");
     }
 
-    // Use NPU 0 device
-    if (getNPUdeviceByType(&_dev_handle, NPUCOND_TRIV2_CONN_SOCIP, 0) < 0)
+    for (int i = 0; i < device_count; i++)
     {
-      throw std::runtime_error("Failed to get TRIV2 NPU device handle");
+      npudev_h h;
+      if (getNPUdeviceByType(&h, NPUCOND_TRIV2_CONN_SOCIP, i) < 0)
+      {
+        throw std::runtime_error("Failed to get TRIX NPU device handle");
+      }
+      _dev_handles.push_back(h);
     }
   }
 
   ~DevContext()
   {
-    if (_dev_handle != nullptr)
+    for (auto h : _dev_handles)
     {
-      unregisterNPUmodel_all(_dev_handle);
-      putNPUdevice(_dev_handle);
+      if (h != nullptr)
+      {
+        unregisterNPUmodel_all(h);
+        putNPUdevice(h);
+      }
     }
   }
 
-  npudev_h getDev() { return _dev_handle; }
+  npudev_h getDev(int i) { return _dev_handles[i]; }
+  int getDevSize() { return _dev_handles.size(); }
 
   template <typename T> void setDataInfo(tensors_data_info *info, std::vector<T *> &tensors)
   {
@@ -66,14 +80,15 @@ public:
     }
   }
 
-  template <typename T> void setBuffer(generic_buffers *buf, std::vector<T *> &tensors)
+  template <typename T>
+  void setBuffer(generic_buffers *buf, std::vector<T *> &tensors, int batch_size, int batch_index)
   {
     buf->num_buffers = static_cast<uint32_t>(tensors.size());
 
     for (uint32_t idx = 0; idx < buf->num_buffers; ++idx)
     {
-      buf->bufs[idx].addr = tensors[idx]->buffer();
-      buf->bufs[idx].size = static_cast<uint64_t>(tensors[idx]->total_size());
+      buf->bufs[idx].size = static_cast<uint64_t>(tensors[idx]->total_size() / batch_size);
+      buf->bufs[idx].addr = tensors[idx]->buffer() + (batch_index * buf->bufs[idx].size);
       buf->bufs[idx].type = BUFFER_MAPPED;
     }
   }
@@ -106,9 +121,8 @@ private:
   }
 
 private:
-  // NPU device handle
-  // TODO Support multicore npu device
-  npudev_h _dev_handle;
+  // NPU device handles
+  std::vector<npudev_h> _dev_handles;
 };
 
 } // namespace trix
diff --git a/runtime/onert/backend/trix/ops/BulkLayer.cc b/runtime/onert/backend/trix/ops/BulkLayer.cc
index 71fdf3f0d..3c49da9a3 100644
--- a/runtime/onert/backend/trix/ops/BulkLayer.cc
+++ b/runtime/onert/backend/trix/ops/BulkLayer.cc
@@ -18,6 +18,7 @@
 #include <util/logging.h>
 
 #include <libnpuhost.h>
+#include <future>
 
 namespace onert
 {
@@ -49,24 +50,56 @@ void BulkLayer::configure(const std::vector<const IPortableTensor *> &inputs,
     throw std::runtime_error("Unable to extract the model metadata");
   }
 
+  _model_id.resize(_dev_context->getDevSize());
+
   generic_buffer model_file;
   model_file.type = BUFFER_FILE;
   model_file.filepath = binary_path.c_str();
   model_file.size = _meta->size;
 
-  if (registerNPUmodel(dev_context->getDev(), &model_file, &_model_id) < 0)
+  for (int i = 0; i < _dev_context->getDevSize(); i++)
+  {
+    if (registerNPUmodel(dev_context->getDev(i), &model_file, &_model_id[i]) < 0)
+    {
+      throw std::runtime_error("Failed to register npu model");
+    }
+  }
+}
+
+void single_job(npudev_h dev, int req_id, input_buffers *input_buf, tensors_data_info *in_info,
+                output_buffers *output_buf, tensors_data_info *out_info)
+{
+  if (setNPU_requestData(dev, req_id, input_buf, in_info, output_buf, out_info))
+  {
+    throw std::runtime_error("Unable to create NPU request for red_id (" + std::to_string(req_id) +
+                             ")");
+  }
+
+  if (submitNPU_request(dev, req_id))
   {
-    throw std::runtime_error("Failed to register npu model");
+    throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) +
+                             ")");
   }
 }
 
 void BulkLayer::run()
 {
-  int req_id;
-  if (createNPU_request(_dev_context->getDev(), _model_id, &req_id))
+  // TODO: Remove too many assumption
+  // We assume user wants batch execution if user's input size is multiples of model's input size
+  int user_input_batch = (_inputs[0]->get_info().shape()).dim(0);
+  int model_input_batch = _meta->input_seg_dims[0][0];
+  int batch_size = user_input_batch / model_input_batch;
+  bool is_batch_execution = (batch_size != 1 ? true : false);
+
+  std::vector<int> req_id(_dev_context->getDevSize());
+
+  for (int i = 0; i < _dev_context->getDevSize(); i++)
   {
-    throw std::runtime_error("Unable to create NPU request with model id (" +
-                             std::to_string(_model_id) + ")");
+    if (createNPU_request(_dev_context->getDev(i), _model_id[i], &req_id[i]))
+    {
+      throw std::runtime_error("Unable to create NPU request with model id (" +
+                               std::to_string(_model_id[i]) + ")");
+    }
   }
 
   if (_meta->input_seg_num != _inputs.size())
@@ -84,28 +117,58 @@ void BulkLayer::run()
   _dev_context->setDataInfo<const IPortableTensor>(&in_info, _inputs);
   _dev_context->setDataInfo<IPortableTensor>(&out_info, _outputs);
 
-  input_buffers input_buf;
-  output_buffers output_buf;
-  _dev_context->setBuffer<const IPortableTensor>(&input_buf, _inputs);
-  _dev_context->setBuffer<IPortableTensor>(&output_buf, _outputs);
+  std::vector<input_buffers> input_buf;
+  std::vector<output_buffers> output_buf;
+  input_buf.resize(_dev_context->getDevSize());
+  output_buf.resize(_dev_context->getDevSize());
+
+  std::vector<std::future<void>> f(_dev_context->getDevSize());
 
-  if (setNPU_requestData(_dev_context->getDev(), req_id, &input_buf, &in_info, &output_buf,
-                         &out_info))
+  const int num_cores = _dev_context->getDevSize();
+  if (is_batch_execution)
   {
-    throw std::runtime_error("Unable to create NPU request for model id (" +
-                             std::to_string(_model_id) + ")");
+    // TODO: Support for general number of cores(>2)
+    // Here we assume that 2 trix cores
+    for (int i = 0; i < (batch_size); i = i + num_cores)
+    {
+      for (int core = 0; core < num_cores; core++)
+      {
+        _dev_context->setBuffer<const IPortableTensor>(&input_buf[core], _inputs, batch_size,
+                                                       i + core);
+        _dev_context->setBuffer<IPortableTensor>(&output_buf[core], _outputs, batch_size, i + core);
+      }
+      for (int core = 0; core < num_cores; core++)
+      {
+
+        if (i + core < batch_size)
+        {
+          f[core] =
+            std::async(std::launch::async, &single_job, _dev_context->getDev(core), req_id[core],
+                       &input_buf[core], &in_info, &output_buf[core], &out_info);
+        }
+      }
+      for (int core = 0; core < num_cores; core++)
+      {
+        f[core].wait();
+      }
+    }
   }
-
-  if (submitNPU_request(_dev_context->getDev(), req_id))
+  else
   {
-    throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) +
-                             ")");
+    _dev_context->setBuffer<const IPortableTensor>(&input_buf[0], _inputs, batch_size, 0);
+    _dev_context->setBuffer<IPortableTensor>(&output_buf[0], _outputs, batch_size, 0);
+
+    single_job(_dev_context->getDev(0), req_id[0], &input_buf[0], &in_info, &output_buf[0],
+               &out_info);
   }
 
-  if (removeNPU_request(_dev_context->getDev(), req_id))
+  for (int i = 0; i < _dev_context->getDevSize(); i++)
   {
-    throw std::runtime_error("Unable to remove NPU request with req id (" + std::to_string(req_id) +
-                             ")");
+    if (removeNPU_request(_dev_context->getDev(i), req_id[i]))
+    {
+      throw std::runtime_error("Unable to remove NPU request with req id (" +
+                               std::to_string(req_id[i]) + ")");
+    }
   }
 }
 
diff --git a/runtime/onert/backend/trix/ops/BulkLayer.h b/runtime/onert/backend/trix/ops/BulkLayer.h
index f7080ccad..614c0f728 100644
--- a/runtime/onert/backend/trix/ops/BulkLayer.h
+++ b/runtime/onert/backend/trix/ops/BulkLayer.h
@@ -50,7 +50,7 @@ private:
   std::vector<const IPortableTensor *> _inputs;
   std::vector<IPortableTensor *> _outputs;
 
-  uint32_t _model_id;
+  std::vector<uint32_t> _model_id;
   npubin_meta *_meta;
   std::shared_ptr<DevContext> _dev_context;
 };
diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.cc b/runtime/onert/backend/xnnpack/KernelGenerator.cc
index 28f729d77..9580bec8c 100644
--- a/runtime/onert/backend/xnnpack/KernelGenerator.cc
+++ b/runtime/onert/backend/xnnpack/KernelGenerator.cc
@@ -56,17 +56,13 @@ std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationI
   assert(_tensor_builder->dynamicTensorManager());
   assert(_tensor_reg);
 
-  auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
-
   // Prepare to handle dynamic tensors later
   auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
   {
-    dyn_ctx->op_ind = ind;
-    dyn_ctx->operations = &_operations_ctx;
-    dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-
-    ret->dynamic_tensor_ctx(dyn_ctx);
+    dyn_ctx->op = &_operations_ctx.at(ind);
+    dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
   }
+  ret->dynamic_tensor_ctx(dyn_ctx);
 
   auto &op = _graph.operations().at(ind);
   op.accept(*this);
diff --git a/runtime/onert/core/CMakeLists.txt b/runtime/onert/core/CMakeLists.txt
index 6dbadf80b..87c7a13e4 100644
--- a/runtime/onert/core/CMakeLists.txt
+++ b/runtime/onert/core/CMakeLists.txt
@@ -6,14 +6,18 @@ nnfw_find_package(Ruy REQUIRED)
 
 add_library(onert_core SHARED ${SOURCES})
 set_target_properties(onert_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+# NOTE
+# We publish public headers into developer package.
+# To avoid mistake using private header in public header, do not define
+# private target_include_directories scope for src/ directory.
 target_include_directories(onert_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_include_directories(onert_core PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
-target_link_libraries(onert_core PUBLIC nnfw_lib_misc half)
-target_link_libraries(onert_core PRIVATE nnfw_lib_cker)
+
+target_link_libraries(onert_core PRIVATE jsoncpp half)
+target_link_libraries(onert_core PRIVATE nnfw_lib_misc nnfw_lib_cker)
 target_link_libraries(onert_core PRIVATE nnfw_common)
 target_link_libraries(onert_core PRIVATE nnfw_coverage)
 target_link_libraries(onert_core PRIVATE dl ${LIB_PTHREAD})
-target_link_libraries(onert_core PRIVATE jsoncpp)
 target_link_libraries(onert_core PRIVATE ruy)
 target_link_libraries(onert_core INTERFACE ruy_instrumentation)
 
@@ -48,6 +52,8 @@ set(TEST_ONERT_CORE test_onert_core)
 add_executable(${TEST_ONERT_CORE} ${TESTS})
 
 target_link_libraries(${TEST_ONERT_CORE} onert_core)
+# Requires linking nnfw_coverage: check header coverage
+target_link_libraries(${TEST_ONERT_CORE} nnfw_coverage)
 target_link_libraries(${TEST_ONERT_CORE} gtest gtest_main dl ${LIB_PTHREAD})
 
 add_test(${TEST_ONERT_CORE} ${TEST_ONERT_CORE})
diff --git a/runtime/onert/core/include/backend/ITensor.h b/runtime/onert/core/include/backend/ITensor.h
index 0a4d9c814..560416264 100644
--- a/runtime/onert/core/include/backend/ITensor.h
+++ b/runtime/onert/core/include/backend/ITensor.h
@@ -20,6 +20,7 @@
 #include <cstring>
 #include <cstdint>
 #include <functional>
+#include <stdexcept>
 
 #include "ir/DataType.h"
 #include "ir/Layout.h"
diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
index 58bfe3406..cf2da4c34 100644
--- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
+++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
@@ -103,7 +103,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
   // 1. Scan DEF of outputs. If the DEF, allocate it
   // 2. Scan DEF of inputs. If variable tensor, allocate it
   // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
-  for (const auto op_ind : order)
+  for (const auto &op_ind : order)
   {
     const auto &op = graph.operations().at(op_ind);
     auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
@@ -161,7 +161,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
     }
   }
 
-  for (auto ind : operands_last_until_end)
+  for (auto &ind : operands_last_until_end)
   {
     tensor_builder->notifyLastUse(ind);
   }
diff --git a/runtime/onert/core/include/compiler/BackendManager.h b/runtime/onert/core/include/compiler/BackendManager.h
index befe40022..b44fcf836 100644
--- a/runtime/onert/core/include/compiler/BackendManager.h
+++ b/runtime/onert/core/include/compiler/BackendManager.h
@@ -17,12 +17,11 @@
 #ifndef __ONERT_COMPILER_BACKEND_MANAGER_H__
 #define __ONERT_COMPILER_BACKEND_MANAGER_H__
 
-#include <memory>
-#include <map>
-
-#include "ir/Operands.h"
 #include "backend/Backend.h"
-#include "backend/builtin/Backend.h"
+#include "ir/Operands.h"
+
+#include <map>
+#include <memory>
 
 namespace onert
 {
@@ -41,7 +40,7 @@ public:
 public:
   backend::Backend *get(const std::string &key);
   const backend::Backend *get(const std::string &key) const;
-  const backend::builtin::Backend *getBuiltin() const;
+  const backend::Backend *getBuiltin() const;
   const std::vector<const backend::Backend *> getAll() const
   {
     std::vector<const backend::Backend *> v;
@@ -65,7 +64,7 @@ private:
 private:
   std::map<std::string, std::unique_ptr<void, dlhandle_destroy_t>> _handle_map;
   std::map<std::string, std::unique_ptr<backend::Backend, backend_destroy_t>> _gen_map;
-  backend::builtin::Backend *_builtin{nullptr};
+  backend::Backend *_builtin{nullptr};
   /**
    * @brief load builtin backend
    *
diff --git a/runtime/onert/core/include/compiler/Compiler.h b/runtime/onert/core/include/compiler/Compiler.h
index 292de4b12..f05d63c66 100644
--- a/runtime/onert/core/include/compiler/Compiler.h
+++ b/runtime/onert/core/include/compiler/Compiler.h
@@ -22,8 +22,8 @@
 #ifndef __ONERT_COMPILER_COMPILE_H_
 #define __ONERT_COMPILER_COMPILE_H_
 
-#include "ir/Graph.h"
-#include "exec/IExecutor.h"
+#include "ir/NNPkg.h"
+#include "exec/Executors.h"
 #include "util/TracingCtx.h"
 
 namespace onert
@@ -40,6 +40,10 @@ enum class State
 
 struct ManualSchedulerOptions
 {
+public:
+  void setBackendMap(const std::string &str);
+
+public:
   std::string backend_for_all;
   std::unordered_map<ir::OpCode, std::string> opcode_to_backend;
   std::unordered_map<ir::OperationIndex, std::string> index_to_backend;
@@ -50,8 +54,14 @@ struct PartialGraphOptions
   std::unordered_map<ir::OperationIndex, ir::SubgraphIndex> index_to_graph;
 };
 
-struct CompilerOptions
+class CompilerOptions
 {
+public:
+  // Set default values for CompilerOptions
+  // All these default values should not be fetched from Env, when we stop supporting Android NNAPI.
+  static std::unique_ptr<CompilerOptions> fromGlobalConfig();
+
+public:
   // GENERAL OPTIONS
   std::vector<std::string> backend_list;
 
@@ -65,75 +75,85 @@ struct CompilerOptions
   bool disable_compile;   //< Run with Interpreter if true, try compilation otherwise
   bool fp16_enable;       //< Whether fp16 mode ON/OFF
   PartialGraphOptions partial_graph_options;
-
-  util::TracingCtx *tracing_ctx; //< Profiling information
 };
 
-CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs);
+struct CompilerArtifact
+{
+  CompilerArtifact(void) = delete;
+  CompilerArtifact(std::shared_ptr<exec::Executors> executors,
+                   std::unique_ptr<const util::TracingCtx> tracing_ctx)
+    : _executors{executors}, _tracing_ctx{std::move(tracing_ctx)} {};
+
+  std::shared_ptr<exec::Executors> _executors;
+  std::unique_ptr<const util::TracingCtx> _tracing_ctx;
+};
 
 /**
- * @brief Class to compile graph model
+ * @brief Class to compile NN package
  */
 class Compiler
 {
 public:
   /**
-   * @brief     Construct a new Compiler object
-   * @param[in] subgs All subgraphs of a model
-   * @param[in] tracing_ctx Profiling information
+   * @brief     Construct a new Compiler object for single model
+   * @param[in] model     model to compile
+   * @param[in] coptions  Compiler Options
+   */
+  Compiler(const std::shared_ptr<ir::Model> &model, CompilerOptions &copt);
+
+  /**
+   * @brief     Construct a new Compiler object for NN package
+   * @param[in] nnpkg    NN package to compile
+   * @param[in] coptions Compiler option vector for each model in package
    */
-  Compiler(const std::shared_ptr<ir::Subgraphs> &subgs, util::TracingCtx *tracing_ctx);
+  Compiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+           std::vector<std::unique_ptr<CompilerOptions>> &copts);
 
 public:
   /**
    * @brief   Do compilation with the options
    *
-   * @return std::shared_ptr<exec::ExecutorMap> Executors as a result of compilation
+   * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation
    */
-  std::shared_ptr<exec::ExecutorMap> compile(void);
+  std::shared_ptr<CompilerArtifact> compile(void);
 
   /**
    * @brief   Do compilation with the options
    *
-   * @return std::vector<std::shared_ptr<exec::ExecutorMap>> Executors as a result of compilation
+   * @return std::vector<std::shared_ptr<CompilerArtifact>> Executors as a result of compilation
    * for pipeline
    */
-  std::vector<std::shared_ptr<exec::ExecutorMap>> compile(const char *package_file_path,
-                                                          const char *map_file_path);
+  std::vector<std::shared_ptr<CompilerArtifact>> compile(const char *package_file_path,
+                                                         const char *map_file_path);
 
   State state(void) const { return _state; }
 
-  CompilerOptions &options() { return _options; }
-
   /**
    * @brief   Allow to compute float32 using float16 data type
    */
   void enableToFp16();
 
   /**
-   * @brief   Set backends from string-encoded mappings from operation index to backend type (cpu,
-   * acl_cl)
-   */
-  void set_backend_from_str(const char *backend_settings);
-
-  /**
    * @brief   Build the partial graphs to compile with original graph
    */
   bool buildPartialGraph(uint32_t num_graphs);
 
 private:
   void checkProfilerConditions();
-  std::shared_ptr<ir::Graph> &primary_subgraph() { return _subgraphs->at(ir::SubgraphIndex{0}); }
+  std::shared_ptr<ir::Graph> &primary_subgraph()
+  {
+    return _nnpkg->primary_model()->at(ir::SubgraphIndex{0});
+  }
 
 private:
-  std::shared_ptr<ir::Subgraphs> _subgraphs;
+  std::shared_ptr<ir::NNPkg> _nnpkg;
   // NOTE These executors does not have duplicated subgraph. This mean they do not allow support
   // subgraphs being called recursively because data of non-constant tensor of parent executor will
   // be updated by child executor. If you want to support subgraphs being called recursively, you
   // have to add allocate non-constant tensor memory of executors in execution time when each
   // subgraph is called.
   State _state;
-  CompilerOptions _options;
+  std::vector<CompilerOptions *> _voptions;
 };
 
 } // namespace compiler
diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h
index 10ca8e9fc..7264f2a10 100644
--- a/runtime/onert/core/include/compiler/LoweredGraph.h
+++ b/runtime/onert/core/include/compiler/LoweredGraph.h
@@ -60,9 +60,14 @@ public:
 private:
   void makeLowerInfo(const compiler::BackendResolver &backend_resolver);
   void dumpLowerInfo();
-  void lowerGraph(const ir::Graph &graph, const compiler::CompilerOptions &options);
+  void lowerGraph(const compiler::CompilerOptions &options);
 
 private:
+  /**
+   *  @brief  Copy of target graph for lowering
+   *  @note   It uses copy of graph, not reference.
+   *          It allows the original graph can be compiled multiple times.
+   */
   ir::Graph _graph;
   ir::Graph _parent_graph;
   std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
diff --git a/runtime/onert/core/include/compiler/StaticShapeInferer.h b/runtime/onert/core/include/compiler/StaticShapeInferer.h
index b2272e262..f701dc207 100644
--- a/runtime/onert/core/include/compiler/StaticShapeInferer.h
+++ b/runtime/onert/core/include/compiler/StaticShapeInferer.h
@@ -28,6 +28,36 @@ namespace onert
 {
 namespace compiler
 {
+/**
+ * @brief Class that observe and update operands.
+ */
+class OperandObserver
+{
+public:
+  /**
+   * @brief Constructor of OperandObserver
+   *
+   * @param operands Operands to be updated
+   */
+  OperandObserver(const std::vector<ir::Operand *> &operands) : _operands{operands} {}
+  /**
+   * @brief Destructor of OperandObserver
+   */
+  virtual ~OperandObserver() = default;
+
+public:
+  /**
+   * @brief Update Shape and some OperandInfo of operands
+   *
+   * @param operands Operands to be updated
+   * @param unpredictable Whether runtime can predict shapes of operands in compilation time
+   */
+  void updateShapes(const std::vector<ir::OperandInfo> &changed_operands_info,
+                    bool unpredictable = false);
+
+private:
+  std::vector<ir::Operand *> _operands;
+};
 
 /**
  * @brief Class to infer shape before running kernels. It does the following:
@@ -38,32 +68,42 @@ namespace compiler
 class StaticShapeInferer : public ir::OperationVisitor
 {
 public:
-  StaticShapeInferer(
-    const ir::SubgraphIndex &subg_idx,
-    const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
-      &lowered_subgs)
-    : _lowered_subgs(lowered_subgs), _operands(lowered_subgs.at(subg_idx)->graph().operands()),
-      _operations(lowered_subgs.at(subg_idx)->graph().operations()),
-      _return_has_dynamic_tensor(false)
-  { /* empty */
+  StaticShapeInferer(compiler::LoweredGraph *lowered_subg)
+    : _lowered_subg{lowered_subg}, _subg_input_observers{}, _controlflow_output_observer{nullptr},
+      _child_inferers{}
+  {
   }
   virtual ~StaticShapeInferer() = default;
 
 public:
+  void appendSubgInputObserver(const ir::SubgraphIndex &subg_idx,
+                               std::unique_ptr<OperandObserver> &&subg_input_observer) noexcept
+  {
+    _subg_input_observers[subg_idx] = std::move(subg_input_observer);
+  }
+
+  void setControlflowOutputObserver(std::unique_ptr<OperandObserver> &&output_observer) noexcept
+  {
+    _controlflow_output_observer = std::move(output_observer);
+  }
+
+  void appendChildInferer(const ir::SubgraphIndex &subg_idx, compiler::StaticShapeInferer *inferer)
+  {
+    _child_inferers[subg_idx] = inferer;
+  }
+
   /**
-   * @brief Infer shape of operands beloning to ops and set the output shape.
+   * @brief Infer shape of operands belonging to ops and set the output shape.
    *        If output shape cannot be known without running op, mark it so that it can be allocated
    *        when running kernel.
-   * @param op Operation
-   * @return @c true if op's input or output has any dynamic tensor; @c false otherwise.
    */
-  bool infer(const ir::Operation &op);
+  void infer(void);
 
   void dump();
 
 private:
-  void inferSubgraph(ir::SubgraphIndex subg_ind);
   bool checkDynamicInput(const ir::Operation &op);
+  bool checkDynamicOutput(const ir::Operation &op);
   void setDynamicOutput(const ir::Operation &op);
 
 private:
@@ -113,6 +153,7 @@ private:
   void visit(const ir::operation::Unpack &op) override;
   void visit(const ir::operation::While &op) override;
   void visit(const ir::operation::DetectionPostProcess &op) override;
+  void visit(const ir::operation::Bulk &op) override;
 
 private:
   /**
@@ -128,12 +169,11 @@ private:
   void handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx);
 
 private:
-  const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
-    &_lowered_subgs;
-  // _operands and _operations can be changed by controlflow operation
-  ir::Operands &_operands;     // operands of current subgraph
-  ir::Operations &_operations; // operations of current subgraph
-  bool _return_has_dynamic_tensor;
+  compiler::LoweredGraph *_lowered_subg;
+  std::unordered_map<ir::SubgraphIndex, std::unique_ptr<OperandObserver>>
+    _subg_input_observers;                                       // child subg input
+  std::unique_ptr<OperandObserver> _controlflow_output_observer; // parent controlflow op output
+  std::unordered_map<ir::SubgraphIndex, compiler::StaticShapeInferer *> _child_inferers;
 };
 
 } // namespace compiler
diff --git a/runtime/onert/core/include/exec/Execution.h b/runtime/onert/core/include/exec/Execution.h
index b0a5cced3..1e8083c4c 100644
--- a/runtime/onert/core/include/exec/Execution.h
+++ b/runtime/onert/core/include/exec/Execution.h
@@ -22,7 +22,7 @@
 #define __ONERT_EXEC_EXECUTION_H__
 
 #include "ir/Layout.h"
-#include "exec/IExecutor.h"
+#include "exec/Executors.h"
 #include "IODescription.h"
 
 #include <thread>
@@ -46,7 +46,7 @@ public:
    * @brief     Construct a new Execution object
    * @param[in] executor  Model executor
    */
-  Execution(const std::shared_ptr<ExecutorMap> &executors);
+  Execution(const std::shared_ptr<Executors> &executors);
 
 public:
   /**
@@ -250,7 +250,7 @@ private:
   std::unique_ptr<IExecutor> &primary_executor() { return _executors->at(ir::SubgraphIndex{0}); };
 
 private:
-  const std::shared_ptr<ExecutorMap> _executors;
+  const std::shared_ptr<Executors> _executors;
   IODescription _io_desc;
   std::deque<std::pair<IODescription *, uint32_t>> _async_io_descs;
   sem_t _async_io_descs_sem;
diff --git a/runtime/onert/core/include/exec/Executors.h b/runtime/onert/core/include/exec/Executors.h
new file mode 100644
index 000000000..5adb0eda4
--- /dev/null
+++ b/runtime/onert/core/include/exec/Executors.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_EXECUTORS_H__
+#define __ONERT_EXEC_EXECUTORS_H__
+
+#include "IExecutor.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to gather executors
+ */
+class Executors
+{
+public:
+  Executors(void) = default;
+  Executors(std::unique_ptr<ir::ModelEdges> model_edges) { _model_edges = std::move(model_edges); }
+  Executors(const Executors &) = delete;
+  Executors(Executors &&) = default;
+
+  // TODO Use Executor index
+  void emplace(ir::SubgraphIndex idx, std::unique_ptr<IExecutor> exec)
+  {
+    _executors.emplace(idx, std::move(exec));
+  }
+
+  std::unique_ptr<IExecutor> &at(ir::SubgraphIndex idx) { return _executors.at(idx); }
+
+  uint32_t inputSize() const;
+
+  uint32_t outputSize() const;
+
+  const ir::OperandInfo inputInfo(const ir::IOIndex &index);
+
+  const ir::OperandInfo outputInfo(const ir::IOIndex &index);
+
+  void execute(const IODescription &desc);
+
+private:
+  void executeEntries(const IODescription &desc);
+
+private:
+  // TODO Use Executor index
+  //      Changing index will effect if/while compile and kernel implementation
+  std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>> _executors;
+  // NOTE _model_edges may use different struct type for executor implementation
+  std::unique_ptr<ir::ModelEdges> _model_edges;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_EXECUTORS_H__
diff --git a/runtime/onert/core/include/exec/FunctionSequence.h b/runtime/onert/core/include/exec/FunctionSequence.h
index cf3f2a882..7ff6d8b8c 100644
--- a/runtime/onert/core/include/exec/FunctionSequence.h
+++ b/runtime/onert/core/include/exec/FunctionSequence.h
@@ -75,8 +75,7 @@ public:
 public: // methods related to dynamic tensor
   struct DynamicTensorCtx
   {
-    ir::OperationIndex op_ind;
-    const ir::Operations *operations = nullptr;
+    const ir::Operation *op = nullptr;
     std::shared_ptr<exec::DynamicShapeInferer> dynamic_shape_inferer = nullptr;
   };
 
diff --git a/runtime/onert/core/include/exec/IExecutor.h b/runtime/onert/core/include/exec/IExecutor.h
index adc68074f..bb5b5af98 100644
--- a/runtime/onert/core/include/exec/IExecutor.h
+++ b/runtime/onert/core/include/exec/IExecutor.h
@@ -107,8 +107,6 @@ struct IExecutor
   virtual const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const = 0;
 };
 
-using ExecutorMap = std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>>;
-
 } // namespace exec
 } // namespace onert
 
diff --git a/runtime/onert/core/include/ir/Graph.h b/runtime/onert/core/include/ir/Graph.h
index 7a7688334..286caf72f 100644
--- a/runtime/onert/core/include/ir/Graph.h
+++ b/runtime/onert/core/include/ir/Graph.h
@@ -20,9 +20,9 @@
 #include <functional>
 #include <unordered_map>
 
+#include "ir/Model.h"
 #include "ir/Operands.h"
 #include "ir/Operations.h"
-#include "ir/Subgraphs.h"
 
 namespace onert
 {
@@ -50,7 +50,9 @@ private:
   };
 
 public:
-  Graph(void);
+  explicit Graph(void);
+  explicit Graph(const Graph &);
+
   ~Graph(void);
 
   // Graph Building
@@ -87,10 +89,9 @@ public:
   void verify(void);
   void removeOperand(const OperandIndex &ind) { _operands.remove(ind); }
   void setLayout(Layout layout) { _layout = layout; }
-  void setSubgraphs(const std::shared_ptr<Subgraphs> &subgs) { _subgraphs = subgs; }
-  void setPartialgraphs(const std::shared_ptr<Subgraphs> &partialgraphs)
+  void setPartialModel(const std::shared_ptr<Model> &partial_model)
   {
-    _partialgraphs = partialgraphs;
+    _partialgraphs = partial_model;
   }
   void
   setTensorName(std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> &tensor_names)
@@ -134,27 +135,25 @@ public:
   Operands &operands() { return _operands; } // TODO Remove this non-const accessor
   const Operations &operations() const { return _operations; }
   Operations &operations() { return _operations; }
-  const std::shared_ptr<Subgraphs> &subgraphs() const { return _subgraphs; }
-  std::shared_ptr<Subgraphs> &subgraphs() { return _subgraphs; }
   Layout layout() const { return _layout; }
-  std::shared_ptr<Subgraphs> &partialgraphs() { return _partialgraphs; }
+  std::shared_ptr<Model> &partialgraphs() { return _partialgraphs; }
   std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> &tensor_names()
   {
     return _tensor_names;
   }
-  std::unordered_map<std::string, IOIndex>::iterator _name_to_input_begin()
+  std::unordered_map<std::string, IOIndex>::const_iterator _name_to_input_begin() const
   {
     return _name_to_input.begin();
   }
-  std::unordered_map<std::string, IOIndex>::iterator _name_to_input_end()
+  std::unordered_map<std::string, IOIndex>::const_iterator _name_to_input_end() const
   {
     return _name_to_input.end();
   }
-  std::unordered_map<std::string, IOIndex>::iterator _name_to_output_begin()
+  std::unordered_map<std::string, IOIndex>::const_iterator _name_to_output_begin() const
   {
     return _name_to_output.begin();
   }
-  std::unordered_map<std::string, IOIndex>::iterator _name_to_output_end()
+  std::unordered_map<std::string, IOIndex>::const_iterator _name_to_output_end() const
   {
     return _name_to_output.end();
   }
@@ -172,13 +171,11 @@ private:
   OperandIndexSequence _outputs;
   std::unordered_map<std::string, IOIndex> _name_to_input;
   std::unordered_map<std::string, IOIndex> _name_to_output;
-  // Child subgraphs
-  std::shared_ptr<Subgraphs> _subgraphs;
   // TFLite and circle's default layout is NHWC;
   Layout _layout{Layout::NHWC};
 
-  // Partial Graphs
-  std::shared_ptr<ir::Subgraphs> _partialgraphs;
+  // model for partial graphs
+  std::shared_ptr<ir::Model> _partialgraphs;
   std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> _tensor_names;
 };
 
diff --git a/runtime/onert/core/include/ir/Index.h b/runtime/onert/core/include/ir/Index.h
index e01b090f3..f01a4c84d 100644
--- a/runtime/onert/core/include/ir/Index.h
+++ b/runtime/onert/core/include/ir/Index.h
@@ -38,6 +38,9 @@ using IOIndex = ::onert::util::Index<uint32_t, IOIndexTag>;
 struct SubgraphIndexTag;
 using SubgraphIndex = ::onert::util::Index<uint32_t, SubgraphIndexTag>;
 
+struct ModelIndexTag;
+using ModelIndex = ::onert::util::Index<uint32_t, ModelIndexTag>;
+
 template <typename IndexType>
 std::ostream &_index_print_impl(std::ostream &o, const std::string &prefix, IndexType index)
 {
@@ -64,7 +67,12 @@ inline std::ostream &operator<<(std::ostream &o, const IOIndex &i)
 
 inline std::ostream &operator<<(std::ostream &o, const SubgraphIndex &i)
 {
-  return _index_print_impl(o, "SUBGRAPH", i); // $ubgraph
+  return _index_print_impl(o, "SUBGRAPH", i);
+}
+
+inline std::ostream &operator<<(std::ostream &o, const ModelIndex &i)
+{
+  return _index_print_impl(o, "MODEL", i);
 }
 
 } // namespace ir
diff --git a/runtime/onert/core/include/ir/Layout.h b/runtime/onert/core/include/ir/Layout.h
index 082810172..0cdbcc2c8 100644
--- a/runtime/onert/core/include/ir/Layout.h
+++ b/runtime/onert/core/include/ir/Layout.h
@@ -18,6 +18,7 @@
 #define __ONERT_IR_LAYOUT_H__
 
 #include <functional>
+#include <stdexcept>
 #include <string>
 
 namespace onert
diff --git a/runtime/onert/core/include/ir/Model.h b/runtime/onert/core/include/ir/Model.h
new file mode 100644
index 000000000..c3c0d87b8
--- /dev/null
+++ b/runtime/onert/core/include/ir/Model.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_MODEL_H__
+#define __ONERT_IR_MODEL_H__
+
+#include <memory>
+#include <unordered_map>
+
+#include "ir/Index.h"
+#include "util/ObjectManager.h"
+
+namespace onert
+{
+namespace ir
+{
+
+class Graph;
+
+class Model
+{
+public:
+  Model() = default;
+  Model(const Model &obj) = default;
+  Model(Model &&) = default;
+  Model &operator=(const Model &) = default;
+  Model &operator=(Model &&) = default;
+  ~Model() = default;
+
+  /**
+   * @brief Put subgraph in the container with a new Index for that
+   *
+   * @param[in] subg Subgraph to be pushed
+   * @param[in] index Index of subgraph to be pushed
+   * @return Created
+   */
+  void push(SubgraphIndex index, const std::shared_ptr<Graph> &subg) { _subgraphs[index] = subg; }
+
+  /**
+   * @brief Remove the subgraph that is associated with the given index
+   *
+   * @param[in] index Index of the subgraph to be removed
+   * @return N/A
+   */
+  void remove(const SubgraphIndex &index) { _subgraphs.erase(index); }
+
+  /**
+   * @brief Get the subgraph that is associated with the given index
+   *
+   * @param[in] index Index of the subgraph to be returned
+   * @return Graph
+   */
+  const std::shared_ptr<Graph> &at(const SubgraphIndex &index) const
+  {
+    return _subgraphs.at(index);
+  }
+  /**
+   * @brief Get the subgraph that is associated with the given index
+   *
+   * @param[in] index Index of the subgraph to be returned
+   * @return Graph
+   */
+  std::shared_ptr<Graph> &at(const SubgraphIndex &index) { return _subgraphs.at(index); }
+
+  /**
+   * @brief Get the subgraph that is associated with the given index
+   *
+   * @param[in] index Index of the subgraph to be returned
+   * @return true if such entry exists otherwise false
+   */
+  bool exist(const SubgraphIndex &index) const
+  {
+    auto it = _subgraphs.find(index);
+    return it != _subgraphs.end();
+  }
+
+  /**
+   * @brief Iterate over the container with given function
+   *
+   * @param[in] fn Function to be run for every container entry
+   * @return N/A
+   */
+  void iterate(const std::function<void(const SubgraphIndex &, const Graph &)> &fn) const
+  {
+    for (const auto &e : _subgraphs)
+    {
+      fn(e.first, *e.second);
+    }
+  }
+
+  /**
+   * @brief Iterate over the container with given function
+   *
+   * @param[in] fn Function to be run for every container entry
+   * @return N/A
+   */
+  void iterate(const std::function<void(const SubgraphIndex &, Graph &)> &fn)
+  {
+    for (const auto &e : _subgraphs)
+    {
+      fn(e.first, *e.second);
+    }
+  }
+
+  /**
+   * @brief Get count of Subgraphs
+   *
+   * @return count of Subgraphs
+   */
+  size_t subgraphs_count() const { return _subgraphs.size(); }
+
+  /**
+   * @brief Return the primary subgraph
+   *
+   * @return std::shared_ptr<Graph> Primary subgraph
+   */
+  std::shared_ptr<Graph> primary_subgraph() const { return _subgraphs.at(SubgraphIndex{0}); }
+
+private:
+  std::unordered_map<SubgraphIndex, std::shared_ptr<Graph>> _subgraphs;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_MODEL_H__
diff --git a/runtime/onert/core/include/ir/NNPkg.h b/runtime/onert/core/include/ir/NNPkg.h
new file mode 100644
index 000000000..d9f825e85
--- /dev/null
+++ b/runtime/onert/core/include/ir/NNPkg.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_NNPKG_H__
+#define __ONERT_IR_NNPKG_H__
+
+#include <memory>
+#include <unordered_set>
+#include <vector>
+
+#include "ir/Index.h"
+#include "ir/Model.h"
+
+namespace onert
+{
+namespace ir
+{
+
+using IODesc = std::tuple<ModelIndex, SubgraphIndex, IOIndex>;
+
+struct ModelEdge
+{
+  IODesc from;
+  IODesc to;
+};
+
+struct ModelEdgeEqual
+{
+  bool operator()(const onert::ir::ModelEdge &lhs, const onert::ir::ModelEdge &rhs) const
+  {
+    return lhs.from == rhs.from && lhs.to == rhs.to;
+  }
+};
+
+struct ModelEdgeHash
+{
+  size_t operator()(const ::onert::ir::ModelEdge &edge) const noexcept
+  {
+    unsigned long long h1 = (std::get<0>(edge.from).value() << 24) |
+                            (std::get<1>(edge.from).value() << 16) | std::get<2>(edge.from).value();
+    unsigned long long h2 = (std::get<0>(edge.to).value() << 24) |
+                            (std::get<1>(edge.to).value() << 16) | std::get<2>(edge.to).value();
+    return h1 + h2;
+  }
+};
+
+inline std::ostream &operator<<(std::ostream &o, const IODesc &od)
+{
+  o << std::get<0>(od).value() << ":" << std::get<1>(od).value() << ":" << std::get<2>(od).value();
+  return o;
+}
+
+using ModelEdgeSet = std::unordered_set<ir::ModelEdge, ir::ModelEdgeHash, ir::ModelEdgeEqual>;
+
+/**
+ * @brief Struct to gather model I/O information in multimodel NN package
+ *        Model I/O will have role one of below
+ *        - Package input/output
+ *        - Edge's start/finish point between model
+ */
+struct ModelEdges
+{
+  std::vector<ir::IODesc> pkg_inputs;
+  std::vector<ir::IODesc> pkg_outputs;
+  ModelEdgeSet edges;
+};
+
+class NNPkg
+{
+public:
+  NNPkg() = default;
+  NNPkg(const NNPkg &obj) = default;
+  NNPkg(NNPkg &&) = default;
+  NNPkg &operator=(const NNPkg &) = default;
+  NNPkg &operator=(NNPkg &&) = default;
+  ~NNPkg() = default;
+
+  NNPkg(std::shared_ptr<Model> model) { _models[ModelIndex{0}] = model; }
+  std::shared_ptr<Model> primary_model() { return _models.at(onert::ir::ModelIndex{0}); }
+
+  /**
+   * @brief Put model at index
+   *
+   * @param[in] model Model to be pushed
+   * @param[in] index Index where Model is to be pushed
+   */
+  void push(ModelIndex index, const std::shared_ptr<Model> &model) { _models[index] = model; }
+
+  /**
+   * @brief Get the count of model
+   *
+   * @return the count of models
+   */
+  size_t model_count() const { return _models.size(); }
+
+  /**
+   * @brief Get model at index
+   *
+   * @param[in] index Index of the model to be returned
+   * @return Model at index
+   */
+  const std::shared_ptr<Model> &model(const ModelIndex &index) const { return _models.at(index); }
+  /**
+   * @brief Get model at index
+   *
+   * @param[in] index Index of the model to be returned
+   * @return Model at index
+   */
+  std::shared_ptr<Model> &model(const ModelIndex &index) { return _models.at(index); }
+
+  /**
+   * @brief Get pkg_input at index
+   *
+   * @param[in] index Index of pkg_input to be returned
+   * @return IODesc at index
+   */
+  const IODesc &input(uint32_t index) const { return _edges.pkg_inputs[index]; }
+  /**
+   * @brief Get pkg_input at index
+   *
+   * @param[in] index Index of pkg_input to be returned
+   * @return IODesc at index
+   */
+  IODesc &input(uint32_t index) { return _edges.pkg_inputs[index]; }
+  /**
+   * @brief Add input at the end
+   *
+   * @param[in] input Input IODesc to be pushed
+   */
+  void addInput(const IODesc &input) { _edges.pkg_inputs.push_back(input); }
+
+  /**
+   * @brief Get pkg_output at index
+   *
+   * @param[in] index Index of pkg_output to be returned
+   * @return IODesc at index
+   */
+  const IODesc &output(uint32_t index) const { return _edges.pkg_outputs[index]; }
+  /**
+   * @brief Get pkg_output at index
+   *
+   * @param[in] index Index of pkg_output to be returned
+   * @return IODesc at index
+   */
+  IODesc &output(uint32_t index) { return _edges.pkg_outputs[index]; }
+  /**
+   * @brief Add output at the end
+   *
+   * @param[in] output Output IODesc to be pushed
+   */
+  void addOutput(const IODesc &output) { _edges.pkg_outputs.push_back(output); }
+
+  /**
+   * @brief Add edge between models at the end
+   *
+   * @param[in] from from IODesc
+   * @param[in] to   to IODesc
+   */
+  void addEdge(const IODesc &from, const IODesc &to)
+  {
+    std::cout << from << " -> " << to << std::endl;
+    _edges.edges.insert(ModelEdge{from, to});
+  }
+  /**
+   * @brief   Get model edge set
+   * @return  Edge set reference
+   */
+  const ModelEdges &model_edges() { return _edges; }
+
+  // TODO: Add iterate() or getter for edges
+
+private:
+  std::unordered_map<ModelIndex, std::shared_ptr<Model>> _models;
+  ModelEdges _edges;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_NNPKG_H__
diff --git a/runtime/onert/core/include/ir/Subgraphs.h b/runtime/onert/core/include/ir/Subgraphs.h
deleted file mode 100644
index 6cb369447..000000000
--- a/runtime/onert/core/include/ir/Subgraphs.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_SUBGRAPHS_H__
-#define __ONERT_IR_SUBGRAPHS_H__
-
-#include <memory>
-#include <unordered_map>
-
-#include "ir/Index.h"
-#include "util/ObjectManager.h"
-
-namespace onert
-{
-namespace ir
-{
-
-class Graph;
-
-class Subgraphs
-{
-public:
-  Subgraphs() = default;
-  Subgraphs(const Subgraphs &obj) = default;
-  Subgraphs(Subgraphs &&) = default;
-  Subgraphs &operator=(const Subgraphs &) = default;
-  Subgraphs &operator=(Subgraphs &&) = default;
-  ~Subgraphs() = default;
-
-  /**
-   * @brief Put subgraph in the container with a new Index for that
-   *
-   * @param[in] subg Subgraph to be pushed
-   * @param[in] index Index of subgraph to be pushed
-   * @return Created
-   */
-  void push(SubgraphIndex index, const std::shared_ptr<Graph> &subg) { _subgraphs[index] = subg; }
-
-  /**
-   * @brief Remove the subgraph that is associated with the given index
-   *
-   * @param[in] index Index of the subgraph to be removed
-   * @return N/A
-   */
-  void remove(const SubgraphIndex &index) { _subgraphs.erase(index); }
-
-  /**
-   * @brief Get the subgraph that is associated with the given index
-   *
-   * @param[in] index Index of the subgraph to be returned
-   * @return Graph
-   */
-  const std::shared_ptr<Graph> &at(const SubgraphIndex &index) const
-  {
-    return _subgraphs.at(index);
-  }
-  /**
-   * @brief Get the subgraph that is associated with the given index
-   *
-   * @param[in] index Index of the subgraph to be returned
-   * @return Graph
-   */
-  std::shared_ptr<Graph> &at(const SubgraphIndex &index) { return _subgraphs.at(index); }
-
-  /**
-   * @brief Get the subgraph that is associated with the given index
-   *
-   * @param[in] index Index of the subgraph to be returned
-   * @return true if such entry exists otherwise false
-   */
-  bool exist(const SubgraphIndex &index) const
-  {
-    auto it = _subgraphs.find(index);
-    return it != _subgraphs.end();
-  }
-
-  /**
-   * @brief Iterate over the container with given function
-   *
-   * @param[in] fn Function to be run for every container entry
-   * @return N/A
-   */
-  void iterate(const std::function<void(const SubgraphIndex &, const Graph &)> &fn) const
-  {
-    for (const auto &e : _subgraphs)
-    {
-      fn(e.first, *e.second);
-    }
-  }
-
-  /**
-   * @brief Iterate over the container with given function
-   *
-   * @param[in] fn Function to be run for every container entry
-   * @return N/A
-   */
-  void iterate(const std::function<void(const SubgraphIndex &, Graph &)> &fn)
-  {
-    for (const auto &e : _subgraphs)
-    {
-      fn(e.first, *e.second);
-    }
-  }
-
-  /**
-   * @brief Get count of Subgraphs
-   *
-   * @return count of Subgraphs
-   */
-  size_t count() const { return _subgraphs.size(); }
-
-  /**
-   * @brief Return the primary subgraph
-   *
-   * @return std::shared_ptr<Graph> Primary sugraph
-   */
-  std::shared_ptr<Graph> primary() const { return _subgraphs.at(SubgraphIndex{0}); }
-
-private:
-  std::unordered_map<SubgraphIndex, std::shared_ptr<Graph>> _subgraphs;
-};
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_SUBGRAPHS_H__
diff --git a/runtime/onert/core/include/ir/TypeInfo.h b/runtime/onert/core/include/ir/TypeInfo.h
index 0a00da5fd..3c5062795 100644
--- a/runtime/onert/core/include/ir/TypeInfo.h
+++ b/runtime/onert/core/include/ir/TypeInfo.h
@@ -50,11 +50,7 @@ public:
 
 public:
   DataType type() const { return _type; }
-  float scale() const
-  {
-    assert(_quant.scales.size() == 1);
-    return _quant.scales[0];
-  }
+  float scale() const { return _quant.scales[0]; }
   const std::vector<float> &scales() const { return _quant.scales; }
   int32_t zero_point() const
   {
diff --git a/runtime/onert/core/include/ir/operation/Bulk.h b/runtime/onert/core/include/ir/operation/Bulk.h
index 1825f7fad..3c20f392f 100644
--- a/runtime/onert/core/include/ir/operation/Bulk.h
+++ b/runtime/onert/core/include/ir/operation/Bulk.h
@@ -32,6 +32,8 @@ public:
   struct Param
   {
     std::string binary_path;
+    std::vector<ir::Shape> origin_input_shapes;
+    std::vector<ir::Shape> origin_output_shapes;
   };
 
 public:
diff --git a/runtime/onert/core/include/util/CalculateActivationRange.h b/runtime/onert/core/include/util/CalculateActivationRange.h
index db76f9dde..4369ca53e 100644
--- a/runtime/onert/core/include/util/CalculateActivationRange.h
+++ b/runtime/onert/core/include/util/CalculateActivationRange.h
@@ -17,6 +17,8 @@
 #ifndef __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__
 #define __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__
 
+#include <limits>
+
 #include "ir/InternalType.h"
 
 namespace onert
diff --git a/runtime/onert/core/include/util/Config.lst b/runtime/onert/core/include/util/Config.lst
index 89a9a6ac2..4bbc02ac3 100644
--- a/runtime/onert/core/include/util/Config.lst
+++ b/runtime/onert/core/include/util/Config.lst
@@ -20,7 +20,7 @@
 
 //     Name                    | Type         | Default
 CONFIG(GRAPH_DOT_DUMP          , int          , "0")
-CONFIG(BACKENDS                , std::string  , "cpu;acl_cl;acl_neon;ruy;xnnpack;gpu_cl;bcq") // FIXME Remove bcq
+CONFIG(BACKENDS                , std::string  , "cpu;acl_cl;acl_neon;ruy;xnnpack;gpu_cl;trix;bcq") // FIXME Remove bcq
 CONFIG(OP_BACKEND_ALLOPS       , std::string  , "")
 CONFIG(OP_BACKEND_MAP          , std::string  , "")
 CONFIG(DISABLE_COMPILE         , bool         , "0")
diff --git a/runtime/onert/core/include/util/ConfigSource.h b/runtime/onert/core/include/util/ConfigSource.h
index da8bc8620..d53b8106d 100644
--- a/runtime/onert/core/include/util/ConfigSource.h
+++ b/runtime/onert/core/include/util/ConfigSource.h
@@ -17,17 +17,17 @@
 #ifndef __ONERT_UTIL_CONFIG_SOURCE_H__
 #define __ONERT_UTIL_CONFIG_SOURCE_H__
 
-#include <memory>
-
-#include "IConfigSource.h"
+#include <string>
+#include <unordered_map>
 
 namespace onert
 {
 namespace util
 {
 
-void config_source(std::unique_ptr<IConfigSource> &&source);
-void config_source_ext(std::unique_ptr<IConfigSource> &&source);
+using CfgKeyValues = std::unordered_map<std::string, std::string>;
+
+void setConfigKeyValues(const CfgKeyValues &keyValues);
 
 bool toBool(const std::string &val);
 int toInt(const std::string &val);
diff --git a/runtime/onert/core/include/util/EnvConfigSource.h b/runtime/onert/core/include/util/EnvConfigSource.h
deleted file mode 100644
index 8c5d0e8e9..000000000
--- a/runtime/onert/core/include/util/EnvConfigSource.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_UTIL_ENV_CONFIG_SOURCE_H__
-#define __ONERT_UTIL_ENV_CONFIG_SOURCE_H__
-
-#include <unordered_map>
-
-#include "util/GeneralConfigSource.h"
-
-namespace onert
-{
-namespace util
-{
-
-class EnvConfigSource final : public GeneralConfigSource
-{
-public:
-  std::string get(const std::string &key) const override;
-
-private:
-  std::unordered_map<std::string, std::string> _default_attributes;
-};
-
-} // namespace util
-} // namespace onert
-
-#endif // __ONERT_UTIL_ENV_CONFIG_SOURCE_H__
diff --git a/runtime/onert/core/include/util/GeneralConfigSource.h b/runtime/onert/core/include/util/GeneralConfigSource.h
deleted file mode 100644
index dedc820ec..000000000
--- a/runtime/onert/core/include/util/GeneralConfigSource.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__
-#define __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__
-
-#include <unordered_map>
-
-#include "util/IConfigSource.h"
-
-namespace onert
-{
-namespace util
-{
-
-class GeneralConfigSource : public IConfigSource
-{
-public:
-  GeneralConfigSource() = default;
-
-  std::string get(const std::string &key) const override;
-  void set(const std::string &key, const std::string &val);
-
-private:
-  std::unordered_map<std::string, std::string> _map;
-};
-
-} // namespace util
-} // namespace onert
-
-#endif // __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__
diff --git a/runtime/onert/core/include/util/IConfigSource.h b/runtime/onert/core/include/util/IConfigSource.h
deleted file mode 100644
index 07b09848a..000000000
--- a/runtime/onert/core/include/util/IConfigSource.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_UTIL_I_CONFIG_SOURCE_H__
-#define __ONERT_UTIL_I_CONFIG_SOURCE_H__
-
-#include <string>
-
-namespace onert
-{
-namespace util
-{
-
-struct IConfigSource
-{
-  /**
-   * @brief Destroy the IConfigSource object
-   */
-  virtual ~IConfigSource() = default;
-
-  /**
-   * @brief get the value for the matching key
-   *
-   * @param key string key to search
-   * @return string value associated with the key
-   */
-  virtual std::string get(const std::string &key) const = 0;
-};
-
-} // namespace util
-} // namespace onert
-
-#endif // __ONERT_UTIL_I_CONFIG_SOURCE_H__
diff --git a/runtime/onert/core/include/util/ObjectManager.h b/runtime/onert/core/include/util/ObjectManager.h
index a493789fa..36b6c85c8 100644
--- a/runtime/onert/core/include/util/ObjectManager.h
+++ b/runtime/onert/core/include/util/ObjectManager.h
@@ -17,14 +17,13 @@
 #ifndef __ONERT_UTIL_OBJECT_MANAGER_H__
 #define __ONERT_UTIL_OBJECT_MANAGER_H__
 
-#include <unordered_map>
-#include <memory>
-#include <list>
-#include <functional>
+#include "util/logging.h"
 
+#include <cassert>
+#include <functional>
+#include <list>
 #include <memory>
-
-#include "util/logging.h"
+#include <unordered_map>
 
 namespace onert
 {
@@ -208,7 +207,7 @@ public:
       l.push_back(e.first);
     }
 
-    for (auto index : l)
+    for (auto &index : l)
     {
       fn(index, *_objects[index]);
     }
diff --git a/runtime/onert/core/include/util/TracingCtx.h b/runtime/onert/core/include/util/TracingCtx.h
index 334257d87..da284d2fb 100644
--- a/runtime/onert/core/include/util/TracingCtx.h
+++ b/runtime/onert/core/include/util/TracingCtx.h
@@ -19,7 +19,7 @@
 
 #include "ir/Graph.h"
 #include "ir/Index.h"
-#include "ir/Subgraphs.h"
+#include "ir/Model.h"
 
 #include <unordered_map>
 #include <mutex>
@@ -37,29 +37,9 @@ class TracingCtx
 public:
   /**
    * @brief Create and store unique session id managed by this class
-   *        Note that this constructor can be called by multiple sessions running in parallely.
-   *        Use this constructor only when there is only one subgraph in a model.
+   * @note  This constructor can be called by multiple session running in parallely.
    */
-  TracingCtx(const ir::Graph *primary_subgraph)
-  {
-    decideSessionID();
-    _subgraph_indices.emplace(primary_subgraph, 0);
-  }
-
-  /**
-   * @brief Create and store unique session id managed by this class
-   *        Note that this constructor can be called by multiple sessions running in parallely.
-   */
-  TracingCtx(const onert::ir::Subgraphs *subgraphs)
-  {
-    assert(subgraphs);
-
-    decideSessionID();
-
-    auto count = subgraphs->count();
-    for (size_t i = 0; i < count; i++)
-      _subgraph_indices.emplace(subgraphs->at(onert::ir::SubgraphIndex(i)).get(), i);
-  }
+  TracingCtx(void) { decideSessionID(); }
 
   uint32_t getSessionId() const { return _session_id; }
 
diff --git a/runtime/onert/core/src/backend/builtin/ExternalContext.h b/runtime/onert/core/src/backend/builtin/ExternalContext.h
index e67be988d..390dbb579 100644
--- a/runtime/onert/core/src/backend/builtin/ExternalContext.h
+++ b/runtime/onert/core/src/backend/builtin/ExternalContext.h
@@ -24,6 +24,8 @@
 #include <ruy/ctx.h>
 #include <ruy/tune.h>
 
+#include <memory>
+
 namespace onert
 {
 namespace backend
diff --git a/runtime/onert/core/src/backend/builtin/KernelGenerator.cc b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc
index 3d6358d9d..fa2fc0b94 100644
--- a/runtime/onert/core/src/backend/builtin/KernelGenerator.cc
+++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc
@@ -16,12 +16,10 @@
 
 #include "KernelGenerator.h"
 
-#include <backend/BackendContext.h>
-#include <util/Utils.h>
 #include "kernel/IfLayer.h"
-#include "kernel/WhileLayer.h"
 #include "kernel/PermuteLayer.h"
-#include "exec/ExecutorBase.h"
+#include "kernel/WhileLayer.h"
+
 #include "exec/FunctionSequence.h"
 
 namespace onert
@@ -35,12 +33,12 @@ KernelGenerator::KernelGenerator(const ir::Graph &graph, DynamicTensorManager *d
                                  const std::shared_ptr<TensorRegistry> &tensor_reg,
                                  const std::shared_ptr<ExternalContext> &external_context)
   : basic::KernelGeneratorBase{graph}, _dyn_tensor_manager{dyn_tensor_manager},
-    _tensor_reg{tensor_reg}, _tensor_registries{}, _executor_map{nullptr}, _external_context{
-                                                                             external_context}
+    _tensor_reg{tensor_reg}, _tensor_registries{}, _executors{nullptr}, _external_context{
+                                                                          external_context}
 {
   UNUSED_RELEASE(_graph);
   UNUSED_RELEASE(_tensor_registries);
-  UNUSED_RELEASE(_executor_map);
+  UNUSED_RELEASE(_executors);
 }
 
 std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
@@ -48,20 +46,16 @@ std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationI
   assert(_dyn_tensor_manager);
   assert(_tensor_reg);
 
-  auto dyn_shape_inferer =
-    std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg);
-
   auto ret = std::make_unique<exec::FunctionSequence>();
 
   // Prepare to handle dynamic tensors later
   auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
   {
-    dyn_ctx->op_ind = ind;
-    dyn_ctx->operations = &_graph.operations();
-    dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-
-    ret->dynamic_tensor_ctx(dyn_ctx);
+    dyn_ctx->op = &_graph.operations().at(ind);
+    dyn_ctx->dynamic_shape_inferer =
+      std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg);
   }
+  ret->dynamic_tensor_ctx(dyn_ctx);
 
   auto &op = _graph.operations().at(ind);
   op.accept(*this);
@@ -90,12 +84,12 @@ void KernelGenerator::visit(const ir::operation::If &node)
     output_tensors.emplace_back(output_tensor);
   }
 
-  // IfLayer just set ExecutorMap instead of then and else executor to avoid complexity of
+  // IfLayer just set Executors instead of then and else executor to avoid complexity of
   // creating executor recusively
   const auto cond_tensor = input_tensors.front();
   input_tensors.erase(input_tensors.begin());
   auto fn = std::make_unique<::onert::backend::builtin::kernel::IfLayer>(
-    cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executor_map,
+    cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executors,
     _external_context);
 
   _return_fn = std::move(fn);
@@ -136,10 +130,10 @@ void KernelGenerator::visit(const ir::operation::While &node)
     output_tensors.emplace_back(output_tensor);
   }
 
-  // WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of
+  // WhileLayer just set Executors instead of cond and body executor to avoid complexity of
   // creating executor recusively
   auto fn = std::make_unique<::onert::backend::builtin::kernel::WhileLayer>(
-    input_tensors, output_tensors, cond_subg_index, body_subg_index, _executor_map,
+    input_tensors, output_tensors, cond_subg_index, body_subg_index, _executors,
     _dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context);
 
   _return_fn = std::move(fn);
diff --git a/runtime/onert/core/src/backend/builtin/KernelGenerator.h b/runtime/onert/core/src/backend/builtin/KernelGenerator.h
index 00ad962b9..d5931ca26 100644
--- a/runtime/onert/core/src/backend/builtin/KernelGenerator.h
+++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.h
@@ -17,13 +17,14 @@
 #ifndef __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__
 #define __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__
 
-#include "exec/IExecutor.h"
+#include "DynamicTensorManager.h"
 #include "ExternalContext.h"
-#include "ir/Graph.h"
-#include "TensorBuilder.h"
-#include "compiler/TensorRegistries.h"
-#include "backend/basic/KernelGeneratorBase.h"
 #include "TensorRegistry.h"
+#include "../../compiler/TensorRegistries.h"
+
+#include "backend/basic/KernelGeneratorBase.h"
+#include "exec/Executors.h"
+#include "ir/Graph.h"
 
 namespace onert
 {
@@ -43,10 +44,10 @@ public:
   {
     _tensor_registries = tensor_registries;
   }
-  void setExecutorMap(const std::shared_ptr<exec::ExecutorMap> &executor_map)
+  void setExecutors(const std::shared_ptr<exec::Executors> &executors)
   {
     // FIXME Using shared_ptr's raw pointer!
-    _executor_map = executor_map.get();
+    _executors = executors.get();
   }
 
   std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
@@ -64,7 +65,7 @@ private:
   DynamicTensorManager *_dyn_tensor_manager;
   std::shared_ptr<TensorRegistry> _tensor_reg;
   compiler::TensorRegistries _tensor_registries;
-  exec::ExecutorMap *_executor_map;
+  exec::Executors *_executors;
   const std::shared_ptr<ExternalContext> _external_context;
 };
 
diff --git a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc
index fdd9d9d14..cdb41960a 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc
+++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc
@@ -16,10 +16,6 @@
 
 #include "IfLayer.h"
 
-#include <backend/ITensor.h>
-#include "exec/ExecutorBase.h"
-#include "PermuteLayer.h"
-
 namespace onert
 {
 namespace backend
@@ -33,13 +29,13 @@ IfLayer::IfLayer(backend::IPortableTensor *cond_tensor,
                  const std::vector<backend::IPortableTensor *> input_tensors,
                  const std::vector<backend::IPortableTensor *> output_tensors,
                  const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
-                 exec::ExecutorMap *executor_map,
+                 exec::Executors *executors,
                  const std::shared_ptr<ExternalContext> &external_context)
   : _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors},
-    _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index},
-    _executor_map{executor_map}, _external_context{external_context}
+    _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index}, _executors{executors},
+    _external_context{external_context}
 {
-  // At this point, executor_map may not have executors of then subg and else subg
+  // At this point, executors may not have executors of then subg and else subg
 }
 
 void IfLayer::run()
@@ -65,12 +61,12 @@ void IfLayer::run()
   if (cond_result)
   {
     VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl;
-    subg_exec = _executor_map->at(_then_subg_index).get();
+    subg_exec = _executors->at(_then_subg_index).get();
   }
   else
   {
     VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl;
-    subg_exec = _executor_map->at(_else_subg_index).get();
+    subg_exec = _executors->at(_else_subg_index).get();
   }
 
   subg_exec->execute(_input_tensors, _output_tensors);
diff --git a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h
index f12ef3605..fa5537a67 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h
+++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h
@@ -18,7 +18,7 @@
 #define __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__
 
 #include <backend/IPortableTensor.h>
-#include <exec/IExecutor.h>
+#include <exec/Executors.h>
 #include "../ExternalContext.h"
 
 namespace onert
@@ -37,8 +37,7 @@ public:
           const std::vector<backend::IPortableTensor *> input_tensors,
           const std::vector<backend::IPortableTensor *> output_tensors,
           const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
-          exec::ExecutorMap *executor_map,
-          const std::shared_ptr<ExternalContext> &external_context);
+          exec::Executors *executors, const std::shared_ptr<ExternalContext> &external_context);
 
 public:
   void run() override;
@@ -49,7 +48,7 @@ private:
   const std::vector<backend::IPortableTensor *> _output_tensors;
   const ir::SubgraphIndex _then_subg_index;
   const ir::SubgraphIndex _else_subg_index;
-  exec::ExecutorMap *_executor_map;
+  exec::Executors *_executors;
   const std::shared_ptr<ExternalContext> _external_context;
 };
 
diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
index 20cd87ad1..ddaecdf57 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
+++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
@@ -16,9 +16,9 @@
 
 #include "PermuteLayer.h"
 
-#include "exec/ShapeConverter.h"
+#include "../../../exec/ShapeConverter.h"
 
-#include "ruy/context.h" // from @ruy
+#include <ruy/context.h> // from @ruy
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
index ac5470e85..227e32434 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
+++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
@@ -17,10 +17,10 @@
 #ifndef __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__
 #define __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__
 
-#include "exec/IPermuteFunction.h"
-#include "exec/IExecutor.h"
 #include "../ExternalContext.h"
-#include "ruy/thread_pool.h" // from @ruy
+#include "../../../exec/IPermuteFunction.h"
+
+#include <ruy/thread_pool.h> // from @ruy
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc
index 81b4a6378..8e006c5ea 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc
+++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc
@@ -16,11 +16,12 @@
 
 #include "WhileLayer.h"
 
-#include <algorithm>
-#include <backend/ITensor.h>
-#include "exec/ExecutorBase.h"
-#include <misc/polymorphic_downcast.h>
 #include "PermuteLayer.h"
+#include "../../../exec/ExecutorBase.h"
+
+#include <misc/polymorphic_downcast.h>
+
+#include <algorithm>
 
 namespace onert
 {
@@ -34,14 +35,14 @@ namespace kernel
 WhileLayer::WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
                        const std::vector<backend::IPortableTensor *> output_tensors,
                        const ir::SubgraphIndex &cond_subg_index,
-                       const ir::SubgraphIndex &body_subg_index, exec::ExecutorMap *executor_map,
+                       const ir::SubgraphIndex &body_subg_index, exec::Executors *executors,
                        basic::DynamicMemoryManager *dyn_memory_manager,
                        const std::shared_ptr<ExternalContext> &external_context)
   : _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index},
-    _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executor_map{executor_map},
+    _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executors{executors},
     _dyn_memory_manager{dyn_memory_manager}, _external_context{external_context}
 {
-  // At this point, executor_map may not have executors of cond subg and body subg
+  // At this point, executors may not have executors of cond subg and body subg
 }
 
 void WhileLayer::run()
@@ -56,8 +57,8 @@ void WhileLayer::run()
   // // Run cond subg
   // If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" ->
   // "_dst_tensors"
-  auto cond_exec = _executor_map->at(_cond_subg_index).get();
-  auto body_exec = _executor_map->at(_body_subg_index).get();
+  auto cond_exec = _executors->at(_cond_subg_index).get();
+  auto body_exec = _executors->at(_body_subg_index).get();
 
   // Need a temp tensor to hold the cond subgraph output
   assert(cond_exec->getOutputTensors().size() == 1);
diff --git a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h
index 912102781..8551b3d09 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h
+++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h
@@ -18,7 +18,7 @@
 #define __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__
 
 #include <backend/IPortableTensor.h>
-#include <exec/IExecutor.h>
+#include <exec/Executors.h>
 #include <exec/IFunction.h>
 #include <ir/OperandIndexSequence.h>
 #include <ir/Graph.h>
@@ -41,7 +41,7 @@ public:
   WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
              const std::vector<backend::IPortableTensor *> output_tensors,
              const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index,
-             exec::ExecutorMap *executor_map, basic::DynamicMemoryManager *dyn_memory_manager,
+             exec::Executors *executors, basic::DynamicMemoryManager *dyn_memory_manager,
              const std::shared_ptr<ExternalContext> &external_context);
 
 public:
@@ -52,7 +52,7 @@ private:
   const ir::SubgraphIndex _body_subg_index;
   const std::vector<backend::IPortableTensor *> _input_tensors;
   const std::vector<backend::IPortableTensor *> _output_tensors;
-  exec::ExecutorMap *_executor_map;
+  exec::Executors *_executors;
   basic::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors
   const std::shared_ptr<ExternalContext> _external_context;
 };
diff --git a/runtime/onert/core/src/compiler/BackendManager.cc b/runtime/onert/core/src/compiler/BackendManager.cc
index 0d6051b21..44442c065 100644
--- a/runtime/onert/core/src/compiler/BackendManager.cc
+++ b/runtime/onert/core/src/compiler/BackendManager.cc
@@ -16,16 +16,11 @@
 
 #include "compiler/BackendManager.h"
 
-#include <memory>
-#include <dlfcn.h>
+#include "../backend/builtin/Backend.h"
+#include "../backend/builtin/Config.h"
 
-#include "backend/Backend.h"
-#include "backend/builtin/Backend.h"
-#include "backend/builtin/Config.h"
-#include "backend/IConfig.h"
-#include "util/logging.h"
-#include "util/ConfigSource.h"
-#include "misc/string_helpers.h"
+#include <dlfcn.h>
+#include <memory>
 
 static const char *SHARED_LIB_EXT =
 #if defined(__APPLE__) && defined(__MACH__)
@@ -152,7 +147,7 @@ const backend::Backend *BackendManager::get(const std::string &key) const
   return nullptr;
 }
 
-const backend::builtin::Backend *BackendManager::getBuiltin() const { return _builtin; }
+const backend::Backend *BackendManager::getBuiltin() const { return _builtin; }
 
 } // namespace compiler
 } // namespace onert
diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc
index 6a1d8fcec..7be9c1e3b 100644
--- a/runtime/onert/core/src/compiler/Compiler.cc
+++ b/runtime/onert/core/src/compiler/Compiler.cc
@@ -18,29 +18,27 @@
 
 #include "ExecutorFactory.h"
 #include "ShapeValidator.h"
+#include "pass/ConstantOutputPass.h"
+#include "pass/OddOutputPass.h"
+#include "pass/PassRunner.h"
+#include "pass/UnusedOperandEliminationPass.h"
+#include "../backend/builtin/Config.h"
+#include "../dumper/dot/DotDumper.h"
+#include "../interp/InterpExecutor.h"
+#include "../ir/OperationCloner.h"
+#include "../ir/OperationDumper.h"
+#include "../ir/verifier/Verifier.h"
 
-#include <backend/builtin/Config.h>
-#include "compiler/BackendManager.h"
-#include "compiler/IScheduler.h"
-#include "compiler/ManualScheduler.h"
-#include "compiler/HEScheduler.h"
 #include "compiler/StaticShapeInferer.h"
-#include "compiler/OperationLowerInfo.h"
-#include "compiler/pass/ConstantOutputPass.h"
-#include "compiler/pass/OddOutputPass.h"
-#include "compiler/pass/PassRunner.h"
-#include "compiler/pass/UnusedOperandEliminationPass.h"
-#include "exec/ExecTime.h"
-#include "ir/verifier/Verifier.h"
-#include "dumper/dot/DotDumper.h"
-#include "compiler/Linear.h"
-#include "interp/InterpExecutor.h"
 #include "util/ConfigSource.h"
 #include "util/logging.h"
-#include "ir/OperationDumper.h"
-#include "ir/OperationCloner.h"
-#include "misc/string_helpers.h"
-#include "json/json.h"
+
+#include <misc/polymorphic_downcast.h>
+#include <misc/string_helpers.h>
+#include <json/json.h>
+
+// TODO Remove using fstream header
+#include <fstream>
 
 namespace
 {
@@ -86,8 +84,104 @@ void verboseOptions(compiler::CompilerOptions &options)
                     << std::noboolalpha;
 }
 
-void setBackendMap(compiler::ManualSchedulerOptions &ms_options, const ir::Subgraphs &subgs,
-                   const std::string &str)
+std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::StaticShapeInferer>>
+createStaticShapeInferers(
+  const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
+    &lowered_subgs)
+{
+  // Allocate StaticShapeInferer per each subgraph
+  std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::StaticShapeInferer>> inferers;
+  for (auto &pair : lowered_subgs)
+  {
+    const auto &subg_index = pair.first;
+    auto &lowered_subg = pair.second;
+    inferers[subg_index] = std::make_unique<compiler::StaticShapeInferer>(lowered_subg.get());
+  }
+
+  // Append observers in all StaticShapeInferers
+  for (auto &pair : lowered_subgs)
+  {
+    const auto &subg_index = pair.first;
+    auto &lowered_subg = pair.second;
+
+    // TODO: Change this iteration for all to controlflow iteration
+    lowered_subg->graph().operations().iterate([&](const ir::OperationIndex &,
+                                                   const ir::Operation &op) {
+      // A Function to append child inferers. These make it possible for a StaticShapeInferer to
+      // call StaticShapeInferes of child subgraphs recursively
+      auto appendChildInferer = [&](const ir::SubgraphIndex &child_subg_idx) {
+        auto *child_inferer = inferers.at(child_subg_idx).get();
+        inferers.at(subg_index)->appendChildInferer(child_subg_idx, child_inferer);
+      };
+
+      // A Function to appaend subg input observers. This makes it possible for a StaticShapeInferer
+      // to update inputs of child subgraphs
+      auto appendSubgraphInputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
+        std::vector<ir::Operand *> child_subg_inputs;
+        auto &child_subg = lowered_subgs.at(child_subg_idx)->graph();
+        for (const auto &input_idx : child_subg.getInputs())
+        {
+          auto operand_ptr = child_subg.operands().getRawPtr(input_idx);
+          child_subg_inputs.emplace_back(operand_ptr);
+        }
+        inferers.at(subg_index)
+          ->appendSubgInputObserver(child_subg_idx,
+                                    std::make_unique<compiler::OperandObserver>(child_subg_inputs));
+      };
+
+      // A Function to set controlflow output observers. This makes it possible for a
+      // StaticShapeInferer to update outputs of parent controlflow opeerations
+      auto setControlFlowOutputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
+        std::vector<ir::Operand *> cf_outputs;
+        auto &subg = lowered_subg->graph();
+        for (const auto &output_idx : op.getOutputs())
+        {
+          auto operand_ptr = subg.operands().getRawPtr(output_idx);
+          cf_outputs.emplace_back(operand_ptr);
+        }
+        inferers.at(child_subg_idx)
+          ->setControlflowOutputObserver(std::make_unique<compiler::OperandObserver>(cf_outputs));
+      };
+
+      // Append Observers in a StaticShapeInferer
+      if (op.opcode() == ir::OpCode::If)
+      {
+        const auto &if_op = nnfw::misc::polymorphic_downcast<const ir::operation::If &>(op);
+
+        appendChildInferer(if_op.param().then_subg_index);
+        appendChildInferer(if_op.param().else_subg_index);
+
+        appendSubgraphInputObserver(if_op.param().then_subg_index);
+        appendSubgraphInputObserver(if_op.param().else_subg_index);
+
+        setControlFlowOutputObserver(if_op.param().then_subg_index);
+      }
+      else if (op.opcode() == ir::OpCode::While)
+      {
+        const auto &while_op = nnfw::misc::polymorphic_downcast<const ir::operation::While &>(op);
+
+        appendChildInferer(while_op.param().cond_subg_index);
+        appendChildInferer(while_op.param().body_subg_index);
+
+        appendSubgraphInputObserver(while_op.param().cond_subg_index);
+        appendSubgraphInputObserver(while_op.param().body_subg_index);
+
+        setControlFlowOutputObserver(while_op.param().body_subg_index);
+      }
+    });
+  }
+
+  return inferers;
+}
+
+} // namespace
+
+namespace onert
+{
+
+namespace compiler
+{
+void ManualSchedulerOptions::setBackendMap(const std::string &str)
 {
   // TODO Support multiple subgraphs for manual scheduling
   auto key_val_list = nnfw::misc::split(str, ';');
@@ -102,37 +196,24 @@ void setBackendMap(compiler::ManualSchedulerOptions &ms_options, const ir::Subgr
     const auto &key_str = key_val.at(0);
     const auto &val = key_val.at(1);
     auto key = static_cast<uint32_t>(std::stoi(key_str));
-
-    subgs.at(ir::SubgraphIndex{0})
-      ->operations()
-      .at(ir::OperationIndex{key}); // Check if exist, or this wil throw
-    ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val);
+    this->index_to_backend.emplace(ir::OperationIndex{key}, val);
   }
 }
 
-} // namespace
-
-namespace onert
-{
-
-namespace compiler
+std::unique_ptr<CompilerOptions> CompilerOptions::fromGlobalConfig()
 {
-
-CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs)
-{
-  CompilerOptions options;
-  options.backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
-  options.trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
-  options.graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
-  options.executor = util::getConfigString(util::config::EXECUTOR);
-  options.he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER);
-  options.he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE);
-  options.disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE);
-  options.fp16_enable = util::getConfigBool(util::config::FP16_ENABLE);
-
+  auto o = std::make_unique<CompilerOptions>();
+  o->backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
+  o->trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
+  o->graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
+  o->executor = util::getConfigString(util::config::EXECUTOR);
+  o->he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER);
+  o->he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE);
+  o->disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE);
+  o->fp16_enable = util::getConfigBool(util::config::FP16_ENABLE);
   {
     // Backend for all
-    auto &ms_options = options.manual_scheduler_options;
+    auto &ms_options = o->manual_scheduler_options;
 
     // Default value for op_backend_all is first element in the backend list
     ms_options.backend_for_all = util::getConfigString(util::config::OP_BACKEND_ALLOPS);
@@ -151,54 +232,67 @@ CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs)
 
     // Index to Backend
     auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP);
-    setBackendMap(ms_options, subgs, map_str);
+    ms_options.setBackendMap(map_str);
   }
-  return options;
+  return o;
 }
 
-Compiler::Compiler(const std::shared_ptr<ir::Subgraphs> &subgs, util::TracingCtx *tracing_ctx)
-  : _subgraphs{subgs}, _state{State::CREATED}
+Compiler::Compiler(const std::shared_ptr<ir::Model> &model, CompilerOptions &copt)
+  : _nnpkg{std::make_shared<ir::NNPkg>(model)}, _state{State::CREATED}, _voptions{&copt}
 {
-  // Set default values for CompilerOptions
-  // All these default values should not be fetched from Env, when we stop supporting Android NN
-  // API.
-  _options = fetchCompilerOptionsFromGlobalConfig(*subgs);
-
-  _options.tracing_ctx = tracing_ctx;
+  // DO NOTHING
 }
 
-void Compiler::enableToFp16() { _options.fp16_enable = true; }
+Compiler::Compiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+                   std::vector<std::unique_ptr<CompilerOptions>> &copts)
+  : _nnpkg{nnpkg}, _state{State::CREATED}, _voptions{}
+{
+  for (uint32_t i = 0; i < copts.size(); i++)
+  {
+    _voptions.push_back(copts[i].get());
+  }
+}
 
-void Compiler::set_backend_from_str(const char *backend_settings)
+void Compiler::enableToFp16()
 {
-  assert(_subgraphs != nullptr);
-  // Backend for all
-  auto &ms_options = _options.manual_scheduler_options;
-  setBackendMap(ms_options, *_subgraphs, std::string{backend_settings});
+  for (auto options : _voptions)
+    options->fp16_enable = true;
 }
 
 void Compiler::checkProfilerConditions()
 {
-  if (!_options.he_scheduler)
+  if (_nnpkg->model_count() != 1)
+    throw std::runtime_error("NYI: Profiling mode for multiple model is not supported yet");
+
+  auto &options = *_voptions[0];
+
+  if (options.he_scheduler)
     throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling.");
 
-  if (_options.executor != "Dataflow")
+  if (options.executor != "Dataflow")
     throw std::runtime_error("Profiling mode works only with 'Dataflow' executor");
 }
 
 bool Compiler::buildPartialGraph(uint32_t num_graphs)
 {
-  if (_subgraphs->count() > 1)
+  // Use 1st model and options only on partial graph (pipeline) compile
+  assert(_nnpkg->model_count() == 1);
+  assert(_voptions.size() == 1);
+
+  auto model = _nnpkg->primary_model();
+  auto &options = *_voptions[0];
+
+  if (model->subgraphs_count() > 1)
     return false;
 
-  auto partialgraphs = std::make_shared<ir::Subgraphs>();
+  auto partialgraphs = std::make_shared<ir::Model>();
 
   for (uint32_t idx = 0; idx < num_graphs; idx++)
   {
     auto partialgraph = std::make_unique<ir::Graph>();
     partialgraphs->push(ir::SubgraphIndex{idx}, std::move(partialgraph));
   }
-  _subgraphs->primary()->setPartialgraphs(partialgraphs);
+  model->primary_subgraph()->setPartialModel(partialgraphs);
 
   auto partial_graph = primary_subgraph()->partialgraphs();
 
@@ -208,8 +302,8 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs)
 
       for (auto use_operation : use_operations)
       {
-        auto graph_index = _options.partial_graph_options.index_to_graph.find(use_operation);
-        if (graph_index == _options.partial_graph_options.index_to_graph.end())
+        auto graph_index = options.partial_graph_options.index_to_graph.find(use_operation);
+        if (graph_index == options.partial_graph_options.index_to_graph.end())
         {
           throw std::runtime_error("Invalid Partition Map");
         }
@@ -230,8 +324,8 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs)
 
   primary_subgraph()->operations().iterate(
     [&](const ir::OperationIndex &operation_index, const ir::Operation &operation) {
-      auto graph_index = _options.partial_graph_options.index_to_graph.find(operation_index);
-      if (graph_index == _options.partial_graph_options.index_to_graph.end())
+      auto graph_index = options.partial_graph_options.index_to_graph.find(operation_index);
+      if (graph_index == options.partial_graph_options.index_to_graph.end())
       {
         throw std::runtime_error("Invalid Partition Map");
       }
@@ -259,7 +353,7 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs)
       assert(new_operation_index == operation_index);
     });
 
-  for (uint32_t idx = 0; idx < partial_graph->count(); idx++)
+  for (uint32_t idx = 0; idx < partial_graph->subgraphs_count(); idx++)
   {
     auto partition = partial_graph->at(ir::SubgraphIndex{idx});
 
@@ -282,10 +376,10 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs)
         auto use_operations = primary_subgraph()->operands().at(operand_index).getUses();
         auto iter = use_operations.begin();
         ir::SubgraphIndex graph_index =
-          _options.partial_graph_options.index_to_graph.find(*iter++)->second;
+          options.partial_graph_options.index_to_graph.find(*iter++)->second;
         while (iter != use_operations.end())
         {
-          if (graph_index != _options.partial_graph_options.index_to_graph.find(*iter)->second &&
+          if (graph_index != options.partial_graph_options.index_to_graph.find(*iter)->second &&
               !partition->getOutputs().contains(operand_index))
           {
             partition->addOutput(operand_index,
@@ -344,96 +438,157 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs)
   return true;
 }
 
-std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
+std::shared_ptr<CompilerArtifact> Compiler::compile(void)
 {
-  // Set control flow backend for control flow operators
+  for (auto options : _voptions)
   {
+    // Set control flow backend for control flow operators
     auto &builtin_id = backend::builtin::Config::ID;
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
-  }
+    options->manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
+    options->manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
+    options->manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
 
-  // FIXME This is a workaround for bcq operations, should remove it
-  {
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+    // FIXME This is a workaround for bcq operations, should remove it
+    options->manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
+    options->manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+
+    // FIXME This is a workaround for bulk operations, should remove it
+    options->manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix";
+
+    verboseOptions(*options);
   }
 
-  verboseOptions(_options);
+  // NYI: allow one model compilation
+  auto const model_count = _nnpkg->model_count();
+  if (model_count != _voptions.size())
+    throw std::runtime_error{"Model count and option vector size mismatch"};
 
-  _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
-    // Mandatory passes
-    pass::PassRunner{}
-      .append(std::make_unique<pass::ConstantOutputPass>(subg))
-      .append(std::make_unique<pass::OddOutputPass>(subg))
-      .run();
+  for (uint32_t i = 0; i < model_count; i++)
+  {
+    _nnpkg->model(ir::ModelIndex{i})->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
+      // Mandatory passes
+      pass::PassRunner{}
+        .append(std::make_unique<pass::ConstantOutputPass>(subg))
+        .append(std::make_unique<pass::OddOutputPass>(subg))
+        .run();
 
-    // Optimizations
-    pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
-  });
+      // Optimizations
+      pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
+    });
+  }
 
   /***************************************************
    * Prepare compilation phase
    ***************************************************/
-  auto executors = std::make_shared<exec::ExecutorMap>();
-
   // Compilable check
   // TODO: Support hybrid execution -
   //       execution between interpreter and compiled executor (including control flow)
-  if (_options.disable_compile)
+  if (_voptions[0]->disable_compile)
   {
-    _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
+    if (model_count > 1)
+      throw std::runtime_error{"NYI: Disable compilation for multi model is not supported yet"};
+
+    auto executors = std::make_shared<exec::Executors>();
+
+    _nnpkg->primary_model()->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
       executors->emplace(index, std::make_unique<interp::InterpExecutor>(subg));
     });
     _state = State::COMPILED;
-    return executors;
+    return std::make_shared<CompilerArtifact>(executors, nullptr);
   }
 
   // Mode check
-  if (_options.he_profiling_mode)
+  // TODO handle option for each model
+  if (_voptions[0]->he_profiling_mode)
     checkProfilerConditions();
 
   /***************************************************
    * Backend independent analysis & optimization phase
    ***************************************************/
-  auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options.graph_dump_level);
+  // TODO Handle dump level for each model
+  auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_voptions[0]->graph_dump_level);
+  onert::dumper::dot::DotDumper dot_dumper(dump_level);
+
+  // Tracing context
+  auto tracing_ctx = std::make_unique<util::TracingCtx>();
+
+  // Model edge context
+  std::unique_ptr<ir::ModelEdges> model_edges = nullptr;
 
   // Lower: Assign backend
   std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs;
-  _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
-    onert::dumper::dot::DotDumper dot_dumper(subg, dump_level);
-    dot_dumper.dump(nnfw::misc::str("before_lower_subg-", index.value()));
 
-    // Lower: Assign backend
-    lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, _options);
+  if (model_count == 1)
+  {
+    _nnpkg->primary_model()->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
+      dot_dumper.dump(subg, nnfw::misc::str("before_lower_subg-", index.value()));
+      // Lower: Assign backend
+      lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, *_voptions[0]);
+      // Set tracing_ctx for copied graph
+      tracing_ctx->setSubgraphIndex(&(lowered_subgs[index]->graph()), index.value());
+    });
+  }
+  else
+  {
+    // TODO Support tracing_ctx for multiple model
+    tracing_ctx = nullptr;
+
+    // Copy model edge context
+    model_edges = std::make_unique<ir::ModelEdges>(_nnpkg->model_edges());
 
-    subg.setSubgraphs(nullptr);
-  });
+    for (uint32_t i = 0; i < model_count; i++)
+    {
+      auto model = _nnpkg->model(ir::ModelIndex{i});
+      if (model->subgraphs_count() != 1)
+        throw std::runtime_error{"NYI: Lowering subgraphs for multiple model is not supported yet"};
+      auto subg = model->primary_subgraph();
+      dot_dumper.dump(*subg, nnfw::misc::str("before_lower_model-", i));
+
+      // For multimodel, model index is used for lowered graph index in lowered graph map
+      // and index type is SubgraphIndex
+      // TODO Find better way to represent lowered graph index for multimodel's subgraph
+      lowered_subgs[ir::SubgraphIndex{i}] =
+        std::make_unique<compiler::LoweredGraph>(*model->primary_subgraph(), *_voptions[i]);
+    }
+  }
 
-  _subgraphs.reset();
+  _nnpkg.reset();
 
   for (auto &pair : lowered_subgs)
   {
     const auto &subg_index = pair.first;
     auto &lowered_subg = pair.second;
-    onert::dumper::dot::DotDumper dot_dumper_lowered(lowered_subg.get(), dump_level);
-    dot_dumper_lowered.dump("after_lower_subg-" + std::to_string(subg_index.value()));
+    dot_dumper.dump(*lowered_subg, "after_lower_subg-" + std::to_string(subg_index.value()));
   }
 
   // Shape inference.
   {
-    const auto primary_subg_idx = ir::SubgraphIndex{0};
-    StaticShapeInferer inferer(primary_subg_idx, lowered_subgs);
-    auto &lowered_subg = lowered_subgs.at(primary_subg_idx);
-    auto ordered_ops = lowered_subg->graph().topolSortOperations();
-    for (auto op_ind : ordered_ops)
+    // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called
+    // recursively
+    std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
+      createStaticShapeInferers(lowered_subgs);
+
+    if (model_count == 1)
     {
-      const auto &op = lowered_subg->graph().operations().at(op_ind);
-      bool has_dynamic_tensor = inferer.infer(op);
-      lowered_subg->setHasDynamicTensor(op_ind, has_dynamic_tensor);
+      const auto primary_subg_idx = ir::SubgraphIndex{0};
+      inferers.at(primary_subg_idx)->infer();
+
+      for (const auto &pair : inferers)
+      {
+        const auto inferer = pair.second.get();
+        inferer->dump();
+      }
+    }
+    else
+    {
+      // Assume multi model has only one subgraph on each model
+      for (const auto &pair : inferers)
+      {
+        const auto inferer = pair.second.get();
+        inferer->infer();
+        inferer->dump();
+      }
     }
-    inferer.dump();
   }
 
   // Shape validation
@@ -452,8 +607,7 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
   /*************************************************************
    *  Backend independent analysis & optimization phase finished
    *************************************************************/
-
-  executors = std::make_shared<exec::ExecutorMap>();
+  auto executors = std::make_shared<exec::Executors>(std::move(model_edges));
   for (auto &pair : lowered_subgs)
   {
     const auto &subg_index = pair.first;
@@ -464,24 +618,31 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
                                std::to_string(subg_index.value()));
     lowered_subg->graph().operations().iterate(
       [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
-    auto executor = std::unique_ptr<exec::IExecutor>{
-      ExecutorFactory::get().create(std::move(lowered_subg), _options, executors)};
+
+    auto &options = (model_count > 1) ? *_voptions[subg_index.value()] : *_voptions[0];
+    auto executor = std::unique_ptr<exec::IExecutor>{ExecutorFactory::get().create(
+      std::move(lowered_subg), tracing_ctx.get(), options, executors)};
     executor->setIndexedRanks(indexed_ranks);
-    executors->insert(std::make_pair(subg_index, std::move(executor)));
+    executors->emplace(subg_index, std::move(executor));
   }
 
   /********************************
    * Code generation phase finished
    ********************************/
   _state = State::COMPILED;
-  return executors;
+  return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx));
 }
 
-std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *package_file_path,
-                                                                  const char *map_file_path)
+std::vector<std::shared_ptr<CompilerArtifact>> Compiler::compile(const char *package_file_path,
+                                                                 const char *map_file_path)
 {
-  std::vector<std::shared_ptr<exec::ExecutorMap>> executors;
-  auto executor_map = std::make_shared<exec::ExecutorMap>();
+  // Allow one model compilation for pipeline
+  if (_nnpkg->model_count() != 1)
+    throw std::runtime_error{"Multiple models compilation for pipeline is not supported yet."};
+  assert(_voptions.size() == 1);
+
+  auto model = _nnpkg->primary_model();
+  auto &options = *_voptions[0];
 
   std::string package_path(package_file_path);
   std::string partition_map_file;
@@ -508,7 +669,7 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
     num_graphs = np.asUInt();
     for (uint32_t i = 0; i < (uint32_t)map.size(); ++i)
     {
-      _options.partial_graph_options.index_to_graph[ir::OperationIndex{i}] =
+      options.partial_graph_options.index_to_graph[ir::OperationIndex{i}] =
         ir::SubgraphIndex{map[i].asUInt()};
     }
   }
@@ -525,25 +686,25 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
   // Set control flow backend for control flow operators
   {
     auto &builtin_id = backend::builtin::Config::ID;
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
+    options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
+    options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
+    options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
   }
 
   // FIXME This is a workaround for bcq operations, should remove it
   {
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
-    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+    options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
+    options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
   }
 
-  // It doesn't support tracing in case of partial graph
+  // FIXME This is a workaround for bulk operations, should remove it
   {
-    _options.tracing_ctx = nullptr;
+    options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix";
   }
 
-  verboseOptions(_options);
+  verboseOptions(options);
 
-  _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
+  model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
     // Mandatory passes
     auto part = subg.partialgraphs();
     part->iterate([&](const ir::SubgraphIndex &, ir::Graph &partialgraph) {
@@ -566,38 +727,41 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
   // Compilable check
   // TODO: Support hybrid execution -
   //       execution between interpreter and compiled executor (including control flow)
-  if (_options.disable_compile)
+  if (options.disable_compile)
   {
-    _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
-      executor_map->emplace(index, std::make_unique<interp::InterpExecutor>(subg));
-      executors.push_back(executor_map);
+    std::vector<std::shared_ptr<CompilerArtifact>> results;
+    auto executors = std::make_shared<exec::Executors>();
+
+    model->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
+      executors->emplace(index, std::make_unique<interp::InterpExecutor>(subg));
     });
+    results.push_back(std::make_shared<CompilerArtifact>(executors, nullptr));
     _state = State::COMPILED;
-    return executors;
+    return results;
   }
 
   // Mode check
-  if (_options.he_profiling_mode)
+  if (options.he_profiling_mode)
     checkProfilerConditions();
 
   /***************************************************
    * Backend independent analysis & optimization phase
    ***************************************************/
-  auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options.graph_dump_level);
+  auto dump_level = static_cast<dumper::dot::DotDumper::Level>(options.graph_dump_level);
+  onert::dumper::dot::DotDumper dot_dumper_part(dump_level);
 
   // Lower: Assign backend
   std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
     lowered_partialgraphs;
-  _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
+  model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
     auto part = subg.partialgraphs();
     part->iterate([&](const ir::SubgraphIndex &pindex, ir::Graph &partialgraph) {
-      onert::dumper::dot::DotDumper dot_dumper_part(partialgraph, dump_level);
-      dot_dumper_part.dump(nnfw::misc::str("before_lower_subg_partialgraph-", pindex.value()));
+      dot_dumper_part.dump(partialgraph,
+                           nnfw::misc::str("before_lower_subg_partialgraph-", pindex.value()));
 
       // // Lower: Assign backend
       lowered_partialgraphs[pindex] =
-        std::make_unique<compiler::LoweredGraph>(subg, partialgraph, _options);
-      partialgraph.setSubgraphs(nullptr);
+        std::make_unique<compiler::LoweredGraph>(subg, partialgraph, options);
     });
   });
 
@@ -606,25 +770,20 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
 
     const auto &partialgraph_index = pair.first;
     auto &lowered_partialgraph = pair.second;
-    onert::dumper::dot::DotDumper dot_dumper_lowered_part(lowered_partialgraph.get(), dump_level);
-    dot_dumper_lowered_part.dump("after_lower_subg_partialgraph-" +
-                                 std::to_string(partialgraph_index.value()));
+    dot_dumper_part.dump(*lowered_partialgraph, "after_lower_subg_partialgraph-" +
+                                                  std::to_string(partialgraph_index.value()));
   }
 
   // Partial Graph shape inference
+  std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
+    createStaticShapeInferers(lowered_partialgraphs);
+  // NOTE If partialgraph has subgraphs StaticShapeInferer may be called multiple times
   for (auto &pair : lowered_partialgraphs)
   {
     const auto &partialgraph_index = pair.first;
-    auto &lowered_partialgraph = pair.second;
-    StaticShapeInferer partial_inferer(partialgraph_index, lowered_partialgraphs);
-    auto ordered_ops = lowered_partialgraph->graph().topolSortOperations();
-    for (auto op_ind : ordered_ops)
-    {
-      const auto &op = lowered_partialgraph->graph().operations().at(op_ind);
-      bool has_dynamic_tensor = partial_inferer.infer(op);
-      lowered_partialgraph->setHasDynamicTensor(op_ind, has_dynamic_tensor);
-    }
-    partial_inferer.dump();
+    const auto partial_inferer = inferers.at(partialgraph_index).get();
+    partial_inferer->infer();
+    partial_inferer->dump();
   }
 
   // Shape validation
@@ -652,9 +811,11 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
     ordered.insert(make_pair(pair.first.value(), std::move(lowered_partialgraph)));
   }
 
+  std::vector<std::shared_ptr<CompilerArtifact>> results;
   for (auto &pair : ordered)
   {
-    executor_map = std::make_shared<exec::ExecutorMap>();
+    auto executors = std::make_shared<exec::Executors>();
+
     const auto &partialgraph_index = ir::SubgraphIndex(pair.first);
     auto &lowered_partialgraph = pair.second;
     auto indexed_ranks = lowered_partialgraph->indexed_ranks();
@@ -663,19 +824,21 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
     lowered_partialgraph->graph().operations().iterate(
       [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
     auto executor = std::unique_ptr<exec::IExecutor>{
-      ExecutorFactory::get().create(std::move(lowered_partialgraph), _options, executor_map)};
+      ExecutorFactory::get().create(std::move(lowered_partialgraph), nullptr, options, executors)};
     executor->setIndexedRanks(indexed_ranks);
-    executor_map->insert(std::make_pair(ir::SubgraphIndex{0}, std::move(executor)));
-    executors.push_back(executor_map);
+    executors->emplace(ir::SubgraphIndex{0}, std::move(executor));
+
+    // It doesn't support tracing in case of partial graph
+    results.push_back(std::make_shared<CompilerArtifact>(executors, nullptr));
   }
 
-  _subgraphs.reset();
+  _nnpkg.reset();
   /********************************
    * Code generation phase finished
    ********************************/
   _state = State::COMPILED;
 
-  return executors;
+  return results;
 }
 
 } // namespace compiler
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc
index f9db1ca89..024556e7e 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.cc
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -16,23 +16,22 @@
 
 #include "ExecutorFactory.h"
 
-#include "backend/builtin/Config.h"
-#include "backend/builtin/KernelGenerator.h"
-#include "backend/builtin/TensorBuilder.h"
-#include "backend/builtin/UserTensor.h"
-#include "backend/IPortableTensor.h"
-#include "compiler/BackendManager.h"
-#include "compiler/BackendManager.h"
-#include "compiler/ExecutionBuilder.h"
-#include "compiler/Linear.h"
-#include "dumper/text/GraphDumper.h"
-#include "exec/DataflowExecutor.h"
-#include "exec/ExecTime.h"
-#include "exec/ExecutionObservers.h"
-#include "exec/LinearExecutor.h"
-#include "exec/ParallelExecutor.h"
-#include "ir/OperationCloner.h"
-#include "util/TracingCtx.h"
+#include "Linear.h"
+#include "../backend/builtin/BackendContext.h"
+#include "../backend/builtin/Config.h"
+#include "../backend/builtin/UserTensor.h"
+#include "../dumper/text/GraphDumper.h"
+#include "../exec/DataflowExecutor.h"
+#include "../exec/ExecTime.h"
+#include "../exec/ExecutionObservers.h"
+#include "../exec/LinearExecutor.h"
+#include "../exec/ParallelExecutor.h"
+#include "../ir/OperationCloner.h"
+
+#include <backend/IPortableTensor.h>
+#include <compiler/BackendManager.h>
+#include <compiler/ExecutionBuilder.h>
+#include <util/TracingCtx.h>
 
 #include <functional>
 #include <memory>
@@ -242,16 +241,17 @@ ExecutorFactory::ExecutorFactory()
 {
   _map["Linear"] = createLinearExecutor;
   _map["Dataflow"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
-                               std::placeholders::_3, false);
+                               std::placeholders::_3, std::placeholders::_4, false);
   _map["Parallel"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
-                               std::placeholders::_3, true);
+                               std::placeholders::_3, std::placeholders::_4, true);
 }
 
 exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+                                         const util::TracingCtx *tracing_ctx,
                                          const compiler::CompilerOptions &options,
-                                         const std::shared_ptr<exec::ExecutorMap> &executor_map)
+                                         const std::shared_ptr<exec::Executors> &executors)
 {
-  return _map.at(options.executor)(std::move(lowered_graph), options, executor_map);
+  return _map.at(options.executor)(std::move(lowered_graph), tracing_ctx, options, executors);
 }
 
 void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph,
@@ -282,7 +282,7 @@ void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_grap
 }
 
 void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs,
-                                            const std::shared_ptr<exec::ExecutorMap> &executor_map,
+                                            const std::shared_ptr<exec::Executors> &executors,
                                             const backend::BackendContexts &backend_contexts)
 {
   for (auto &pair : backend_contexts)
@@ -292,7 +292,7 @@ void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs,
     {
       auto builtin_kernel_gen = builtin_context->kernel_gen;
       builtin_kernel_gen->setTensorRegistries(tensor_regs);
-      builtin_kernel_gen->setExecutorMap(executor_map);
+      builtin_kernel_gen->setExecutors(executors);
     }
   }
 }
@@ -317,12 +317,11 @@ ExecutorFactory::orderBackendContext(const backend::BackendContexts &backend_con
   return ordered_contexts;
 }
 
-exec::IExecutor *
-ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
-                                      const compiler::CompilerOptions &options,
-                                      const std::shared_ptr<exec::ExecutorMap> &executor_map)
+exec::IExecutor *ExecutorFactory::createLinearExecutor(
+  std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
+  const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors)
 {
-  auto graph = lowered_graph->graph();
+  auto &graph = lowered_graph->graph();
 
   backend::BackendContexts backend_contexts =
     createBackendContexts(*lowered_graph, options.executor == "Linear");
@@ -346,7 +345,7 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
   prepareMigrantTensors(*lowered_graph, backend_contexts);
 
   // Give some runtime objects to builtin KernelGenerator
-  prepareBuiltinBackend(tensor_regs, executor_map, backend_contexts);
+  prepareBuiltinBackend(tensor_regs, executors, backend_contexts);
 
   ExecutionBuilder builder;
 
@@ -426,14 +425,17 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
 
   auto code_map = builder.releaseCodeMap();
 
-  auto exec = new exec::LinearExecutor{
-    std::move(lowered_graph), std::move(backend_contexts), tensor_regs, std::move(code_map), order,
-    options.tracing_ctx};
+  auto exec = new exec::LinearExecutor{std::move(lowered_graph),
+                                       std::move(backend_contexts),
+                                       tensor_regs,
+                                       std::move(code_map),
+                                       order,
+                                       tracing_ctx};
 
   if (!options.trace_filepath.empty())
   {
-    std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
-      options.trace_filepath, exec->graph(), options.tracing_ctx);
+    std::unique_ptr<exec::IExecutionObserver> ctp =
+      std::make_unique<exec::TracingObserver>(options.trace_filepath, exec->graph(), tracing_ctx);
     exec->addObserver(std::move(ctp));
   }
 
@@ -441,8 +443,9 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
 }
 
 exec::IExecutor *ExecutorFactory::createDataflowExecutor(
-  std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
-  const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel)
+  std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
+  const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors,
+  bool parallel)
 {
   backend::BackendContexts backend_contexts =
     createBackendContexts(*lowered_graph, options.executor == "Linear");
@@ -462,7 +465,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
   prepareMigrantTensors(*lowered_graph, backend_contexts);
 
   // Give some runtime objects to builtin KernelGenerator
-  prepareBuiltinBackend(tensor_regs, executor_map, backend_contexts);
+  prepareBuiltinBackend(tensor_regs, executors, backend_contexts);
 
   ExecutionBuilder builder;
 
@@ -491,13 +494,13 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
   if (parallel)
   {
     exec = new exec::ParallelExecutor{std::move(lowered_graph), std::move(backend_contexts),
-                                      tensor_regs, std::move(code_map), options.tracing_ctx};
+                                      tensor_regs, std::move(code_map), tracing_ctx};
   }
   else
   {
     auto dataflow_exec =
       new exec::DataflowExecutor{std::move(lowered_graph), std::move(backend_contexts), tensor_regs,
-                                 std::move(code_map), options.tracing_ctx};
+                                 std::move(code_map), tracing_ctx};
     if (options.he_profiling_mode)
     {
       std::vector<const backend::Backend *> backends;
@@ -515,8 +518,8 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
 
   if (!options.trace_filepath.empty())
   {
-    std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
-      options.trace_filepath, exec->graph(), options.tracing_ctx);
+    std::unique_ptr<exec::IExecutionObserver> ctp =
+      std::make_unique<exec::TracingObserver>(options.trace_filepath, exec->graph(), tracing_ctx);
     exec->addObserver(std::move(ctp));
   }
 
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h
index 2ee05fae3..70c089f8c 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.h
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.h
@@ -21,7 +21,7 @@
 
 #include "backend/ITensor.h"
 #include "compiler/LoweredGraph.h"
-#include "exec/IExecutor.h"
+#include "exec/Executors.h"
 
 #include <deque>
 #include <unordered_map>
@@ -38,8 +38,9 @@ public:
 
 public:
   exec::IExecutor *create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+                          const util::TracingCtx *tracing_ctx,
                           const compiler::CompilerOptions &options,
-                          const std::shared_ptr<exec::ExecutorMap> &executor_map);
+                          const std::shared_ptr<exec::Executors> &executors);
 
 private:
   ExecutorFactory();
@@ -48,25 +49,26 @@ private:
   static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph,
                                     const backend::BackendContexts &backend_contexts);
   static void prepareBuiltinBackend(const TensorRegistries &tensor_regs,
-                                    const std::shared_ptr<exec::ExecutorMap> &executor_map,
+                                    const std::shared_ptr<exec::Executors> &executors,
                                     const backend::BackendContexts &backend_contexts);
   static std::deque<std::pair<const backend::Backend *, backend::BackendContext *>>
   orderBackendContext(const backend::BackendContexts &backend_contexts);
 
-  static exec::IExecutor *
-  createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
-                       const compiler::CompilerOptions &options,
-                       const std::shared_ptr<exec::ExecutorMap> &executor_map);
+  static exec::IExecutor *createLinearExecutor(
+    std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
+    const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors);
   static exec::IExecutor *
   createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+                         const util::TracingCtx *tracing_ctx,
                          const compiler::CompilerOptions &options,
-                         const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel);
+                         const std::shared_ptr<exec::Executors> &executors, bool parallel);
 
 private:
-  std::unordered_map<std::string, std::function<exec::IExecutor *(
-                                    std::unique_ptr<compiler::LoweredGraph>,
-                                    const compiler::CompilerOptions &options,
-                                    const std::shared_ptr<exec::ExecutorMap> &executor_map)>>
+  std::unordered_map<
+    std::string,
+    std::function<exec::IExecutor *(
+      std::unique_ptr<compiler::LoweredGraph>, const util::TracingCtx *tracing_ctx,
+      const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors)>>
     _map;
 };
 
diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
index 5c1cef1ab..98dc906e4 100644
--- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
+++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
@@ -180,7 +180,7 @@ void Fp32ToFp16Converter::appendOpSequences()
 {
   _lowered_graph.op_seqs().iterate(
     [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
-      const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+      const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
       assert(lower_info != nullptr);
 
       // For now, the only acl_cl supports fully fp16 type
@@ -375,7 +375,7 @@ void Fp32ToFp16Converter::convertOperands()
 {
   _lowered_graph.op_seqs().iterate(
     [&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
-      const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+      const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
       assert(lower_info != nullptr);
       // For now, the only acl_cl supports fully fp16
       if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
@@ -515,7 +515,7 @@ ir::OperandIndex Fp32ToFp16Converter::newCopiedOperand(const ir::OperandIndex &o
 void Fp32ToFp16Converter::setNewOperandLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
                                                  const ir::OperandIndex &new_op_ind)
 {
-  const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+  const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
   assert(lower_info != nullptr);
   auto new_lower_info = std::make_unique<compiler::OperandLowerInfo>();
   auto permute_factor = compiler::PermuteFactor(lower_info->backend(), lower_info->layout());
@@ -527,7 +527,7 @@ void Fp32ToFp16Converter::setNewOperandLowerInfo(const ir::OpSequenceIndex &op_s
 void Fp32ToFp16Converter::setNewOperationLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
                                                    const ir::OpSequenceIndex &new_op_seq_ind)
 {
-  const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+  const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
   assert(lower_info != nullptr);
 
   auto new_lower_info =
@@ -635,7 +635,7 @@ ir::OpSequenceIndex Fp32ToFp16Converter::newOpSequence(const ir::OpSequenceIndex
                                                        const ir::OperationIndex &node_index)
 {
   auto &node = _lowered_graph.graph().operations().at(node_index);
-  const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+  const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
   assert(lower_info != nullptr);
   auto layout = lower_info->layout();
 
diff --git a/runtime/onert/core/src/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.cc
index 2f996c8e8..c4bfddb8f 100644
--- a/runtime/onert/core/src/compiler/HEScheduler.cc
+++ b/runtime/onert/core/src/compiler/HEScheduler.cc
@@ -14,17 +14,14 @@
  * limitations under the License.
  */
 
-#include "ir/Operand.h"
-#include "compiler/HEScheduler.h"
-#include "ir/Graph.h"
-#include "util/ConfigSource.h"
+#include "HEScheduler.h"
+
 #include "compiler/BackendResolver.h"
+#include "ir/Graph.h"
 #include "util/logging.h"
-#include "util/Utils.h"
-#include "exec/FunctionSequence.h"
+
 #include <cassert>
 #include <cmath>
-#include <chrono>
 
 namespace
 {
diff --git a/runtime/onert/core/src/compiler/HEScheduler.h b/runtime/onert/core/src/compiler/HEScheduler.h
index 1a95b9881..18ea388fd 100644
--- a/runtime/onert/core/src/compiler/HEScheduler.h
+++ b/runtime/onert/core/src/compiler/HEScheduler.h
@@ -23,14 +23,16 @@
 #ifndef __ONERT_COMPILER_H_E_SCHEDULER_H_
 #define __ONERT_COMPILER_H_E_SCHEDULER_H_
 
-#include "compiler/IScheduler.h"
-#include "compiler/BackendManager.h"
-#include "compiler/Compiler.h"
-#include "ir/Graph.h"
-#include "exec/ExecTime.h"
-#include "backend/Backend.h"
-#include <memory>
-#include "ir/OperationIndexMap.h"
+#include "IScheduler.h"
+#include "../backend/builtin/Config.h"
+#include "../exec/ExecTime.h"
+
+#include <backend/Backend.h>
+#include <compiler/BackendManager.h>
+#include <compiler/Compiler.h>
+#include <ir/Graph.h>
+#include <ir/OperationIndexMap.h>
+
 #include <map>
 #include <memory>
 
diff --git a/runtime/onert/core/src/compiler/HEScheduler.test.cc b/runtime/onert/core/src/compiler/HEScheduler.test.cc
new file mode 100644
index 000000000..c4a2df025
--- /dev/null
+++ b/runtime/onert/core/src/compiler/HEScheduler.test.cc
@@ -0,0 +1,572 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "HEScheduler.h"
+#include "../exec/ExecTime.h"
+
+#include <ir/DataType.h>
+#include <ir/InternalType.h>
+#include <ir/Shape.h>
+#include <ir/TypeInfo.h>
+#include <ir/operation/BinaryArithmetic.h>
+#include <ir/operation/FullyConnected.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using namespace onert;
+using namespace ir;
+using namespace backend;
+using namespace operation;
+using namespace exec;
+
+//
+// Mock backends classes
+//
+
+struct MockConfigCPU : public IConfig
+{
+  std::string id() override { return "cpu"; }
+  bool initialize() override { return true; };
+  bool supportPermutation() override { return false; }
+  Layout supportLayout(const Operation &, Layout) override { return Layout::UNKNOWN; }
+  bool supportDynamicTensor() override { return false; }
+  bool supportFP16() override { return false; }
+};
+
+class MockBackendContext : public BackendContext
+{
+public:
+  using BackendContext::BackendContext;
+  ITensorRegistry *genTensors() override { return nullptr; }
+  FunctionMap genKernels() override { return {}; }
+};
+
+struct MockBackendCPU : public Backend
+{
+  std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigCPU>(); }
+  std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
+  {
+    return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
+  }
+};
+
+struct MockConfigGPU : public IConfig
+{
+  std::string id() override { return "gpu"; }
+  bool initialize() override { return true; };
+  bool supportPermutation() override { return false; }
+  ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
+  {
+    return ir::Layout::UNKNOWN;
+  }
+  bool supportDynamicTensor() override { return false; }
+  bool supportFP16() override { return false; }
+};
+
+struct MockBackendGPU : public Backend
+{
+  std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigGPU>(); }
+  std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
+  {
+    return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
+  }
+};
+
+struct MockConfigNPU : public IConfig
+{
+  std::string id() override { return "npu"; }
+  bool initialize() override { return true; };
+  bool supportPermutation() override { return false; }
+  ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
+  {
+    return ir::Layout::UNKNOWN;
+  }
+  bool supportDynamicTensor() override { return false; }
+  bool supportFP16() override { return false; }
+};
+
+struct MockBackendNPU : public Backend
+{
+  std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigNPU>(); }
+  std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
+  {
+    return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
+  }
+};
+
+//
+// Constants
+//
+
+const int OPERAND_ELEMS = 268203;
+const int OPERAND_SIZE = OPERAND_ELEMS * 4;
+const int OPERATION_SIZE = OPERAND_SIZE * 3;
+
+const std::string LINEAR("Linear");
+const std::string DATAFLOW("Dataflow");
+const std::string PARALLEL("Parallel");
+
+//
+// Helper functions
+//
+
+// Set executor through environment variable
+void setExecutor(const std::string &executor) { setenv("EXECUTOR", executor.c_str(), true); }
+
+// Set profiling mode through environment variable
+void setProfilingMode(const bool value) { setenv("PROFILING_MODE", value ? "1" : "0", true); }
+
+// Calculate operation size by addition sizes of all input and output operands
+uint32_t calcOpSize(const std::shared_ptr<Graph> &graph, const OperationIndex &op_idx)
+{
+  uint32_t size = 0;
+  const auto &op = graph->operations().at(op_idx);
+  for (const auto &ind : op.getInputs() + op.getOutputs())
+    size += graph->operands().at(ind).info().total_size();
+  return size;
+}
+
+// Set execution operation time. This method is needed since ExecutionTime has only
+// 'updateOperationExecTime' method.
+void setOperationExecTime(ExecTime &et, const Backend *backend, const std::string &operation,
+                          bool quant, uint32_t op_size, int64_t time)
+{
+  // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
+  assert(time > 0);
+  int64_t prev_time = et.getOperationExecTime(backend, operation, quant, op_size);
+  int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
+  et.updateOperationExecTime(backend, operation, quant, op_size, time_to_set);
+  assert(et.getOperationExecTime(backend, operation, quant, op_size) == time);
+}
+
+// Set same execution time for all given backends/operations
+void setOperationsExecutionTime(const std::vector<const Backend *> &backends,
+                                const std::vector<std::string> &op_names,
+                                const std::vector<uint32_t> &op_sizes, int64_t exec_time)
+{
+  assert(op_names.size() == op_sizes.size());
+  ExecTime et(backends);
+  for (int i = 0; i < op_names.size(); ++i)
+  {
+    for (auto &backend : backends)
+      setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
+  }
+  et.storeOperationsExecTime();
+}
+
+// Set permute time from one backend to another. This method is needed since ExecutionTime has only
+// 'updatePermuteTime' method.
+void setPermutationTime(ExecTime &et, const Backend *from_backend, const Backend *to_backend,
+                        bool quant, uint32_t op_size, int64_t time)
+{
+  // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
+  assert(time > 0);
+  int64_t prev_time = et.getPermuteTime(from_backend, to_backend, quant, op_size);
+  int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
+  et.updatePermuteTime(from_backend, to_backend, quant, op_size, time_to_set);
+  assert(et.getPermuteTime(from_backend, to_backend, quant, op_size) == time);
+}
+
+// Set same permutation time between all given backends
+void setPermutationsExecutionTime(const std::vector<const Backend *> &backends,
+                                  const int operand_size, const int64_t exec_time)
+{
+  ExecTime et(backends);
+  for (const auto &backend : backends)
+  {
+    for (auto &other_backend : backends)
+    {
+      if (backend == other_backend)
+        continue;
+      setPermutationTime(et, backend, other_backend, false, operand_size, exec_time);
+    }
+  }
+  et.storeOperationsExecTime();
+}
+
+//
+// Functions for creating graphs
+//
+
+using OIS = OperandIndexSequence;
+
+template <typename NodeT, typename... Types>
+OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args)
+{
+  auto op = std::make_unique<NodeT>(std::forward<Types>(args)...);
+  auto op_idx = graph->addOperation(std::move(op));
+  // For now in scheduler test all operations in tested graphs has same size (for simplicity)
+  assert(calcOpSize(graph, op_idx) == OPERATION_SIZE);
+  return op_idx;
+}
+
+// Create straight graph: Add->Sub->Mul
+std::shared_ptr<Graph> createStraightGraph()
+{
+  auto graph = std::make_shared<Graph>();
+  const TypeInfo float_op(DataType::FLOAT32);
+
+  // Create add node
+  auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
+
+  // Create sub node
+  auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params);
+
+  // Create mul node
+  auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params);
+
+  graph->verify();
+  return graph;
+}
+
+/* Create branched graph:
+ *       [Add]
+ *      //   \\
+ *   [Mul1]  [FC2]
+ *     ||     ||
+ *   [Mul2]  [FC2]
+ *      \\   //
+ *       [Sub]
+ */
+std::shared_ptr<Graph> createBranchedGraph()
+{
+  auto graph = std::make_shared<Graph>();
+  const TypeInfo float_op(DataType::FLOAT32);
+
+  // Create add node
+  auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
+
+  // Create mul1 node
+  auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx},
+                           mul1_op_params);
+
+  // Create mul2 node
+  auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx},
+                           mul2_op_params);
+
+  // Create fc1 node
+  auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  FullyConnected::Param fc1_op_params{Activation::NONE};
+  create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params);
+
+  // Create fc2 node
+  auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  FullyConnected::Param fc2_op_params{Activation::NONE};
+  create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params);
+
+  // Create sub node
+  auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+  BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params);
+
+  graph->verify();
+  return graph;
+}
+
+//
+// Tests setup/teardown
+//
+
+// SetUp/TearDown methods runs before/after each test and performs actions common for each test
+class HESchedulerTest : public ::testing::Test
+{
+protected:
+  void SetUp() override
+  {
+    // Initialize mock backends
+    _cpu_backend = new MockBackendCPU();
+    _gpu_backend = new MockBackendGPU();
+    _npu_backend = new MockBackendNPU();
+    _mock_backends = {_cpu_backend, _gpu_backend, _npu_backend};
+
+    // Remove previous profile data if it exists
+    if (!remove("exec_time.json"))
+    {
+      // DO NOTHING (no profile data)
+    }
+
+    // Remember original value of 'EXECUTOR' environment variable
+    char *executor = std::getenv("EXECUTOR");
+    _original_executor = executor == nullptr ? "" : executor;
+
+    // Remember original value of 'PROFILING_MODE' environment variable
+    char *profiling_mode = std::getenv("PROFILING_MODE");
+    _original_profiling_mode = profiling_mode == nullptr ? "" : profiling_mode;
+  }
+
+  void TearDown() override
+  {
+    delete _cpu_backend;
+    delete _gpu_backend;
+    delete _npu_backend;
+    EXPECT_EQ(remove("exec_time.json"), 0);
+    setenv("EXECUTOR", _original_executor.c_str(), true);
+    setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true);
+  }
+
+  const MockBackendCPU *_cpu_backend{nullptr};
+  const MockBackendGPU *_gpu_backend{nullptr};
+  const MockBackendNPU *_npu_backend{nullptr};
+  std::vector<const Backend *> _mock_backends;
+
+  std::string _original_executor;
+  std::string _original_profiling_mode;
+};
+
+//
+// HEScheduler tests
+//
+
+class HESchedulerTestWithExecutorParam : public HESchedulerTest,
+                                         public testing::WithParamInterface<std::string>
+{
+};
+
+// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
+// one time for each executor
+INSTANTIATE_TEST_SUITE_P(AllExecutors, HESchedulerTestWithExecutorParam,
+                         testing::Values(LINEAR, DATAFLOW, PARALLEL));
+
+// Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
+TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
+{
+  setExecutor(GetParam());
+
+  // Prepare graph
+  ir::Model model;
+  auto graph(createStraightGraph());
+  model.push(ir::SubgraphIndex{0}, graph);
+  OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2);
+
+  // Set default execution and transfer time
+  setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1);
+  setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul"},
+                             {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
+
+  // Test 1
+  // Expected behaviour: scheduler assigns different backend to each node
+  {
+    // For each backend reduce execution time of one node
+    ExecTime et(_mock_backends);
+    setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1);
+    setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1);
+    setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1);
+    et.storeOperationsExecTime();
+
+    // Test scheduler
+    auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+    auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+    const auto br = scheduler.schedule(*graph);
+    ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
+    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu");
+    ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "npu");
+  }
+
+  // Test 2
+  // Expected behaviour: scheduler assigns single backend to all nodes because of big transfer time
+  {
+    // Increase transfer time
+    setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5);
+
+    // Test scheduler
+    auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+    auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+    const auto br = scheduler.schedule(*graph);
+    ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
+    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
+    ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "cpu");
+  }
+}
+
+// Test scheduler behavior for branched graph with known execution time of all nodes and permutes
+TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time)
+{
+  const int64_t NPU_ET = 5000;
+  setExecutor(GetParam());
+
+  // Prepare graph
+  ir::Model model;
+  auto graph(createBranchedGraph());
+  model.push(ir::SubgraphIndex{0}, graph);
+  OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
+    sub_op_idx(5);
+
+  // Set default execution and transfer time
+  setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000);
+  setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul", "FullyConnected"},
+                             {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
+
+  // Test 1
+  // Expected behaviour: for dataflow and linear executors scheduler assigns fastest backend to all
+  // nodes, in case of parallel executor scheduler assigns different backends to branches.
+  {
+    // Reduce execution time
+    ExecTime et(_mock_backends);
+    setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, NPU_ET);
+    setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, NPU_ET);
+    setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, NPU_ET);
+    setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET);
+    setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000);
+    setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000);
+    et.storeOperationsExecTime();
+
+    // Test scheduler
+    auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+    auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+    const auto br = scheduler.schedule(*graph);
+
+    std::string branch1_expected_backend("npu"), branch2_expected_backend("npu");
+    if (GetParam() == PARALLEL)
+    {
+      branch1_expected_backend =
+        br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
+      branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu";
+    }
+
+    ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
+    ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), branch1_expected_backend);
+    ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), branch1_expected_backend);
+    ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), branch2_expected_backend);
+    ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), branch2_expected_backend);
+    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
+  }
+
+  // Test 2
+  // Expected behaviour: scheduler assigns single backend to all nodes
+  {
+    // Increase execution time for GPU backend
+    ExecTime et(_mock_backends);
+    /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt *
+     * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the
+     * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter
+     * branching or scheduler assigns another backend to a node*/
+    setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1);
+    setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1);
+    et.storeOperationsExecTime();
+
+    // Test scheduler
+    auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+    auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+    const auto br = scheduler.schedule(*graph);
+    ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
+    ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
+    ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
+    ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "npu");
+    ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "npu");
+    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
+  }
+}
+
+// Test scheduler behavior for branched graph and enabled profiling mode
+TEST_F(HESchedulerTest, branched_graph_profiling_mode)
+{
+  const int ET = 1e5;
+
+  // Turn on profiling mode
+  setProfilingMode(true);
+  setExecutor(DATAFLOW);
+
+  // Prepare graph
+  ir::Model model;
+  auto graph(createBranchedGraph());
+  model.push(ir::SubgraphIndex{0}, graph);
+  OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
+    sub_op_idx(5);
+
+  // Test 1
+  // Expected behaviour: scheduler assigns backends to nodes with unknown execution time
+  {
+    // Set execution time for all backends/nodes except for cpu/Sub, npu/Mul, gpu/FC
+    ExecTime et(_mock_backends);
+    setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, ET);
+    setOperationExecTime(et, _cpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
+    setOperationExecTime(et, _cpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
+    setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, ET);
+    setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
+    setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, ET);
+    setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET);
+    setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
+    setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET);
+    et.storeOperationsExecTime();
+
+    // Test scheduler
+    auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+    auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+    const auto br = scheduler.schedule(*graph);
+    ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
+    ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
+    ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "gpu");
+    ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "gpu");
+    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
+  }
+
+  // Test 2
+  // Expected behaviour: scheduler shuffling backends, so different backends are assigned to
+  // neighbor nodes
+  {
+    // Set execution time for rest backends/nodes (cpu/Sub, npu/Mul, gpu/FC)
+    ExecTime et(_mock_backends);
+    setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET);
+    setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
+    setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
+    et.storeOperationsExecTime();
+
+    // Test scheduler
+    auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+    auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+    const auto br = scheduler.schedule(*graph);
+    ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
+              br->getBackend(mul1_op_idx)->config()->id());
+    ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
+              br->getBackend(fc1_op_idx)->config()->id());
+    ASSERT_NE(br->getBackend(mul1_op_idx)->config()->id(),
+              br->getBackend(mul2_op_idx)->config()->id());
+    ASSERT_NE(br->getBackend(fc1_op_idx)->config()->id(),
+              br->getBackend(fc2_op_idx)->config()->id());
+    ASSERT_NE(br->getBackend(mul2_op_idx)->config()->id(),
+              br->getBackend(sub_op_idx)->config()->id());
+    ASSERT_NE(br->getBackend(fc2_op_idx)->config()->id(),
+              br->getBackend(sub_op_idx)->config()->id());
+  }
+}
+
+// TODO: Add tests with unknown execution and permutation time
+
+} // unnamed namespace
diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc
index 73ba96238..f85b8d1bd 100644
--- a/runtime/onert/core/src/compiler/Linear.cc
+++ b/runtime/onert/core/src/compiler/Linear.cc
@@ -14,15 +14,13 @@
  * limitations under the License.
  */
 
-#include <algorithm>
-#include <sstream>
-
 #include "Linear.h"
 
-#include "backend/IConfig.h"
-#include "backend/Backend.h"
+#include "../dumper/text/GraphDumper.h"
+
 #include "util/logging.h"
-#include "dumper/text/GraphDumper.h"
+
+#include <sstream>
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/compiler/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc
index 999bffa7c..9e84753a7 100644
--- a/runtime/onert/core/src/compiler/LoweredGraph.cc
+++ b/runtime/onert/core/src/compiler/LoweredGraph.cc
@@ -16,24 +16,23 @@
 
 #include "compiler/LoweredGraph.h"
 
-#include <assert.h>
-#include <algorithm>
-#include <sstream>
-#include "util/logging.h"
-#include "compiler/pass/ConstantInsertionPass.h"
-#include "compiler/pass/ConstantLoweringPass.h"
-#include "compiler/pass/PassRunner.h"
-#include "compiler/pass/PermutationOperationPass.h"
-#include "compiler/pass/PermutationInsertionPass.h"
-#include "compiler/pass/PermutationEliminationPass.h"
-#include "dumper/text/GraphDumper.h"
-#include "ir/verifier/Verifier.h"
+#include "HEScheduler.h"
+#include "ManualScheduler.h"
+#include "pass/ConstantInsertionPass.h"
+#include "pass/ConstantLoweringPass.h"
+#include "pass/PassRunner.h"
+#include "pass/PermutationEliminationPass.h"
+#include "pass/PermutationInsertionPass.h"
+#include "pass/PermutationOperationPass.h"
+#include "../dumper/text/GraphDumper.h"
+#include "../ir/verifier/Verifier.h"
+
 #include "backend/Backend.h"
-#include "backend/IConfig.h"
 #include "compiler/BackendResolver.h"
-#include "compiler/ManualScheduler.h"
-#include "compiler/HEScheduler.h"
-#include "util/TracingCtx.h"
+#include "util/logging.h"
+
+#include <cassert>
+#include <sstream>
 
 namespace onert
 {
@@ -42,7 +41,7 @@ namespace compiler
 
 LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph}
 {
-  lowerGraph(graph, options);
+  lowerGraph(options);
 }
 
 // TODO Design better class and constructor to represent parent_graph
@@ -50,18 +49,11 @@ LoweredGraph::LoweredGraph(const ir::Graph &parent_graph, const ir::Graph &graph
                            const CompilerOptions &options)
   : _graph{graph}, _parent_graph{parent_graph}
 {
-  lowerGraph(graph, options);
+  lowerGraph(options);
 }
 
-void LoweredGraph::lowerGraph(const ir::Graph &graph, const CompilerOptions &options)
+void LoweredGraph::lowerGraph(const CompilerOptions &options)
 {
-  // set tracing_ctx for copied graph
-  if (options.tracing_ctx)
-  {
-    auto subgraph_index = options.tracing_ctx->getSubgraphIndex(&graph);
-    options.tracing_ctx->setSubgraphIndex(&_graph, subgraph_index.value());
-  }
-
   // Build backend contexts
   auto &backend_manager = BackendManager::get();
   // Create contexts for other backends
diff --git a/runtime/onert/core/src/compiler/ShapeValidator.cc b/runtime/onert/core/src/compiler/ShapeValidator.cc
index 1c7000986..8c6421744 100644
--- a/runtime/onert/core/src/compiler/ShapeValidator.cc
+++ b/runtime/onert/core/src/compiler/ShapeValidator.cc
@@ -34,77 +34,72 @@ namespace onert
 namespace compiler
 {
 
-ShapeValidator::ShapeValidator(const ir::Graph &graph)
-  : _graph{graph}, _ctx{graph.operands()}, _current_layout{ir::Layout::UNKNOWN}
-{
-}
+ShapeValidator::ShapeValidator(const ir::Graph &graph) : _graph{graph} {}
 
 void ShapeValidator::checkUnaryOp(const ir::Operation &node)
 {
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(0)};
 
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
   // Check if I/O shapes match
-  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+  OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape());
 }
 
 void ShapeValidator::operator()()
 {
-  // There is no reason for each subgraph to have subgraphs since compiler has subgraphs when
-  // creating Compiler
-  assert(_graph.subgraphs() == nullptr);
-
-  _current_layout = _graph.layout();
-
   _graph.operations().iterate(
     [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
 }
 
 void ShapeValidator::visit(const ir::operation::BatchMatMul &node)
 {
+  const auto &operands = _graph.operands();
   const auto lhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::LHS));
   const auto rhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::RHS));
   const auto out_index{node.getOutputs().at(0)};
 
-  if (_ctx.at(out_index).info().isDynamic())
+  if (operands.at(out_index).info().isDynamic())
     return;
 
-  OP_REQUIRES(_ctx.at(lhs_index).shape().rank() <= 4);
-  OP_REQUIRES(_ctx.at(rhs_index).shape().rank() <= 4);
-  OP_REQUIRES(_ctx.at(lhs_index).shape().rank() >= 2);
-  OP_REQUIRES(_ctx.at(rhs_index).shape().rank() >= 2);
+  OP_REQUIRES(operands.at(lhs_index).shape().rank() <= 4);
+  OP_REQUIRES(operands.at(rhs_index).shape().rank() <= 4);
+  OP_REQUIRES(operands.at(lhs_index).shape().rank() >= 2);
+  OP_REQUIRES(operands.at(rhs_index).shape().rank() >= 2);
 }
 
 void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node)
 {
+  const auto &operands = _graph.operands();
   const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
+  if (operands.at(ofm_index).info().isDynamic())
     return;
 
   const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
   const auto block_size_index{
     node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
 
-  const auto frontend_layout = _current_layout;
-  const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
-  const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
+  const auto frontend_layout = _graph.layout();
+  const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
+  const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
 
   // All requirement as per NNAPI specification.
-  OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
-  OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
-  OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1);
+  OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(block_size_index).shape().rank() == 1);
 
-  OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2);
+  OP_REQUIRES(operands.at(block_size_index).shape().dim(0) == 2);
 
   if (node.getInputs().size() != 2)
   {
     const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
-    OP_REQUIRES(_ctx.at(crops_index).shape().rank() == 2);
-    OP_REQUIRES(_ctx.at(crops_index).shape().dim(0) == (_ctx.at(ifm_index).shape().rank() - 2));
-    OP_REQUIRES(_ctx.at(crops_index).shape().dim(1) == 2);
+    OP_REQUIRES(operands.at(crops_index).shape().rank() == 2);
+    OP_REQUIRES(operands.at(crops_index).shape().dim(0) ==
+                (operands.at(ifm_index).shape().rank() - 2));
+    OP_REQUIRES(operands.at(crops_index).shape().dim(1) == 2);
   }
 
   OP_REQUIRES(input_shape.C == output_shape.C);
@@ -112,8 +107,9 @@ void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node)
 
 void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node)
 {
+  const auto &operands = _graph.operands();
   const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
+  if (operands.at(ofm_index).info().isDynamic())
     return;
 
   const auto ifm_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)};
@@ -125,16 +121,16 @@ void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node)
     node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
   // const auto bias_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::BIAS)};
 
-  OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 2);
-  OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 2);
-  OP_REQUIRES(_ctx.at(weight_scales_index).shape().rank() == 1);
-  OP_REQUIRES(_ctx.at(weight_binary_index).shape().rank() == 2);
-  OP_REQUIRES(_ctx.at(weight_cluster_index).shape().rank() == 2);
+  OP_REQUIRES(operands.at(ifm_index).shape().rank() == 2);
+  OP_REQUIRES(operands.at(ofm_index).shape().rank() == 2);
+  OP_REQUIRES(operands.at(weight_scales_index).shape().rank() == 1);
+  OP_REQUIRES(operands.at(weight_binary_index).shape().rank() == 2);
+  OP_REQUIRES(operands.at(weight_cluster_index).shape().rank() == 2);
 
-  OP_REQUIRES(_ctx.at(ifm_index).shape().dim(1) == _ctx.at(ofm_index).shape().dim(1));
+  OP_REQUIRES(operands.at(ifm_index).shape().dim(1) == operands.at(ofm_index).shape().dim(1));
 
-  OP_REQUIRES(_ctx.at(weight_cluster_index).shape().dim(0) > 0);
-  OP_REQUIRES(_ctx.at(weight_cluster_index).shape().dim(1) == 2);
+  OP_REQUIRES(operands.at(weight_cluster_index).shape().dim(0) > 0);
+  OP_REQUIRES(operands.at(weight_cluster_index).shape().dim(1) == 2);
 
   // more shape validation will be done inside kernel.
 
@@ -143,8 +139,9 @@ void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node)
 
 void ShapeValidator::visit(const ir::operation::BCQGather &node)
 {
+  const auto &operands = _graph.operands();
   const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
+  if (operands.at(ofm_index).info().isDynamic())
     return;
 
   const auto indices_index{node.getInputs().at(ir::operation::BCQGather::Input::INDICES)};
@@ -153,13 +150,14 @@ void ShapeValidator::visit(const ir::operation::BCQGather &node)
   const auto input_clusters_index{
     node.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
 
-  OP_REQUIRES(_ctx.at(indices_index).shape().rank() <= 2); // TODO : support rank up to 4 or more
-  OP_REQUIRES(_ctx.at(input_binary_index).shape().rank() == 2);
-  OP_REQUIRES(_ctx.at(input_scales_index).shape().rank() == 1);
-  OP_REQUIRES(_ctx.at(input_clusters_index).shape().rank() == 2);
+  OP_REQUIRES(operands.at(indices_index).shape().rank() <=
+              2); // TODO : support rank up to 4 or more
+  OP_REQUIRES(operands.at(input_binary_index).shape().rank() == 2);
+  OP_REQUIRES(operands.at(input_scales_index).shape().rank() == 1);
+  OP_REQUIRES(operands.at(input_clusters_index).shape().rank() == 2);
 
-  OP_REQUIRES(_ctx.at(input_clusters_index).shape().dim(0) > 0);
-  OP_REQUIRES(_ctx.at(input_clusters_index).shape().dim(1) == 2);
+  OP_REQUIRES(operands.at(input_clusters_index).shape().dim(0) > 0);
+  OP_REQUIRES(operands.at(input_clusters_index).shape().dim(1) == 2);
 
   // more shape validation will be done inside kernel.
 }
@@ -171,62 +169,67 @@ void ShapeValidator::visit(const ir::operation::Comparison &)
 
 void ShapeValidator::visit(const ir::operation::Softmax &node)
 {
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
   const auto input_index{node.getInputs().at(0)};
 
-  OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
+  OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank());
 }
 
 void ShapeValidator::visit(const ir::operation::InstanceNorm &node)
 {
+  const auto &operands = _graph.operands();
   const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
+  if (operands.at(ofm_index).info().isDynamic())
     return;
 
   const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
   const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
   const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
 
-  OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
-  OP_REQUIRES(_ctx.at(ifm_index).shape() == _ctx.at(ofm_index).shape());
-  OP_REQUIRES(_ctx.at(gamma_index).shape().rank() == 1);
-  OP_REQUIRES(_ctx.at(beta_index).shape().rank() == 1);
+  OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(ifm_index).shape() == operands.at(ofm_index).shape());
+  OP_REQUIRES(operands.at(gamma_index).shape().rank() == 1);
+  OP_REQUIRES(operands.at(beta_index).shape().rank() == 1);
 }
 
 void ShapeValidator::visit(const ir::operation::Pool2D &node)
 {
+  const auto &operands = _graph.operands();
   const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
+  if (operands.at(ofm_index).info().isDynamic())
     return;
 
   const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
 
-  OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
 }
 
 void ShapeValidator::visit(const ir::operation::Permute &node)
 {
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
   const auto input_index{node.getInputs().at(0)};
 
-  OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
+  OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank());
 }
 
 void ShapeValidator::visit(const ir::operation::Reduce &node)
 {
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
   const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
-  const auto input_shape = _ctx.at(input_index).shape();
-  const auto output_shape = _ctx.at(output_index).shape();
+  const auto input_shape = operands.at(input_index).shape();
+  const auto output_shape = operands.at(output_index).shape();
 
   OP_REQUIRES(input_shape.rank() <= 4);
   OP_REQUIRES(output_shape.rank() <= input_shape.rank());
@@ -266,18 +269,20 @@ void ShapeValidator::visit(const ir::operation::Reduce &node)
 
 void ShapeValidator::visit(const ir::operation::Transpose &node)
 {
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
   const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
   const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
 
-  const auto &output_shape = _ctx.at(output_index).shape();
-  const auto &input_shape = _ctx.at(input_index).shape();
+  const auto &output_shape = operands.at(output_index).shape();
+  const auto &input_shape = operands.at(input_index).shape();
 
-  OP_REQUIRES(_ctx.at(perm_index).shape().num_elements() == 0 ||
-              input_shape.rank() == static_cast<int>(_ctx.at(perm_index).shape().num_elements()));
+  OP_REQUIRES(operands.at(perm_index).shape().num_elements() == 0 ||
+              input_shape.rank() ==
+                static_cast<int>(operands.at(perm_index).shape().num_elements()));
   OP_REQUIRES(input_shape.rank() == output_shape.rank());
 }
 
@@ -285,8 +290,9 @@ void ShapeValidator::visit(const ir::operation::RNN &node)
 {
   // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
   // TODO Support dynamic rnn
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
   const auto hidden_state_out_index{
@@ -299,35 +305,36 @@ void ShapeValidator::visit(const ir::operation::RNN &node)
   const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
   const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
 
-  const auto batch_size = _ctx.at(output_index).shape().dim(0);
-  const auto num_units = _ctx.at(output_index).shape().dim(1);
-
-  OP_REQUIRES(_ctx.at(output_index).shape().rank() == 2 &&
-              _ctx.at(hidden_state_out_index).shape().rank() == 2 &&
-              _ctx.at(input_index).shape().rank() == 2 &&
-              _ctx.at(weights_index).shape().rank() == 2 &&
-              _ctx.at(recurrent_weights_index).shape().rank() == 2 &&
-              _ctx.at(hidden_state_in_index).shape().rank() == 2);
-  OP_REQUIRES(_ctx.at(bias_index).shape().rank() == 1);
-
-  OP_REQUIRES(batch_size == _ctx.at(input_index).shape().dim(0) &&
-              batch_size == _ctx.at(hidden_state_in_index).shape().dim(0) &&
-              batch_size == _ctx.at(hidden_state_out_index).shape().dim(0));
-  OP_REQUIRES(_ctx.at(input_index).shape().dim(1) == _ctx.at(weights_index).shape().dim(1));
-
-  OP_REQUIRES(num_units == _ctx.at(weights_index).shape().dim(0) &&
-              num_units == _ctx.at(recurrent_weights_index).shape().dim(0) &&
-              num_units == _ctx.at(bias_index).shape().dim(0));
-  OP_REQUIRES(num_units == _ctx.at(output_index).shape().dim(1) &&
-              num_units == _ctx.at(recurrent_weights_index).shape().dim(1) &&
-              num_units == _ctx.at(hidden_state_in_index).shape().dim(1) &&
-              num_units == _ctx.at(hidden_state_out_index).shape().dim(1));
+  const auto batch_size = operands.at(output_index).shape().dim(0);
+  const auto num_units = operands.at(output_index).shape().dim(1);
+
+  OP_REQUIRES(operands.at(output_index).shape().rank() == 2 &&
+              operands.at(hidden_state_out_index).shape().rank() == 2 &&
+              operands.at(input_index).shape().rank() == 2 &&
+              operands.at(weights_index).shape().rank() == 2 &&
+              operands.at(recurrent_weights_index).shape().rank() == 2 &&
+              operands.at(hidden_state_in_index).shape().rank() == 2);
+  OP_REQUIRES(operands.at(bias_index).shape().rank() == 1);
+
+  OP_REQUIRES(batch_size == operands.at(input_index).shape().dim(0) &&
+              batch_size == operands.at(hidden_state_in_index).shape().dim(0) &&
+              batch_size == operands.at(hidden_state_out_index).shape().dim(0));
+  OP_REQUIRES(operands.at(input_index).shape().dim(1) == operands.at(weights_index).shape().dim(1));
+
+  OP_REQUIRES(num_units == operands.at(weights_index).shape().dim(0) &&
+              num_units == operands.at(recurrent_weights_index).shape().dim(0) &&
+              num_units == operands.at(bias_index).shape().dim(0));
+  OP_REQUIRES(num_units == operands.at(output_index).shape().dim(1) &&
+              num_units == operands.at(recurrent_weights_index).shape().dim(1) &&
+              num_units == operands.at(hidden_state_in_index).shape().dim(1) &&
+              num_units == operands.at(hidden_state_out_index).shape().dim(1));
 }
 
 void ShapeValidator::visit(const ir::operation::SpaceToBatchND &node)
 {
+  const auto &operands = _graph.operands();
   const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
+  if (operands.at(ofm_index).info().isDynamic())
     return;
 
   const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
@@ -335,39 +342,40 @@ void ShapeValidator::visit(const ir::operation::SpaceToBatchND &node)
     node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
   const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
 
-  const auto frontend_layout = _current_layout;
-  const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
-  const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
+  const auto frontend_layout = _graph.layout();
+  const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
+  const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
 
   // All requirement as per NNAPI specification.
-  OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
-  OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
-  OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1);
-  OP_REQUIRES(_ctx.at(paddings_index).shape().rank() == 2);
+  OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(block_size_index).shape().rank() == 1);
+  OP_REQUIRES(operands.at(paddings_index).shape().rank() == 2);
 
-  OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2);
-  OP_REQUIRES(_ctx.at(paddings_index).shape().dim(0) == 2);
-  OP_REQUIRES(_ctx.at(paddings_index).shape().dim(1) == 2);
+  OP_REQUIRES(operands.at(block_size_index).shape().dim(0) == 2);
+  OP_REQUIRES(operands.at(paddings_index).shape().dim(0) == 2);
+  OP_REQUIRES(operands.at(paddings_index).shape().dim(1) == 2);
 
   OP_REQUIRES(input_shape.C == output_shape.C);
 }
 
 void ShapeValidator::visit(const ir::operation::SpaceToDepth &node)
 {
+  const auto &operands = _graph.operands();
   const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
+  if (operands.at(ofm_index).info().isDynamic())
     return;
 
   const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
 
-  const auto frontend_layout = _current_layout;
-  const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
-  const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
+  const auto frontend_layout = _graph.layout();
+  const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
+  const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
   const auto block_size = node.param().block_size;
 
   // All assertions as per NNAPI specification.
-  OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
-  OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
   OP_REQUIRES((input_shape.H % block_size == 0) && (input_shape.W % block_size == 0));
   OP_REQUIRES(input_shape.N == output_shape.N);
   OP_REQUIRES(input_shape.C * block_size * block_size == output_shape.C);
@@ -382,29 +390,31 @@ void ShapeValidator::visit(const ir::operation::ElementwiseBinary &)
 
 void ShapeValidator::visit(const ir::operation::ElementwiseUnary &node)
 {
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
 
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
-  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+  OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape());
 }
 
 void ShapeValidator::visit(const ir::operation::EmbeddingLookup &node)
 {
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(0)};
   const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
   const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
 
-  const auto &output_obj = _ctx.at(output_index);
-  const auto &lookups_obj = _ctx.at(lookups_index);
-  const auto &values_obj = _ctx.at(values_index);
+  const auto &output_obj = operands.at(output_index);
+  const auto &lookups_obj = operands.at(lookups_index);
+  const auto &values_obj = operands.at(values_index);
 
   // Verify operand here, not at SimpleEmbeddingLookup::configure() to avoid acl's modifying
   // TensorShape sometimes(Issue: https://github.sec.samsung.net/STAR/nnfw/issues/729)
   {
-    if (_ctx.at(output_index).info().isDynamic())
+    if (operands.at(output_index).info().isDynamic())
       return;
 
     const auto &output_shape = output_obj.shape();
@@ -427,26 +437,28 @@ void ShapeValidator::visit(const ir::operation::EmbeddingLookup &node)
 
 void ShapeValidator::visit(const ir::operation::ExpandDims &node)
 {
+  const auto &operands = _graph.operands();
   const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
 
-  if (_ctx.at(axis_index).info().isDynamic())
+  if (operands.at(axis_index).info().isDynamic())
     return;
-  OP_REQUIRES(_ctx.at(axis_index).shape().rank() <= 1);
+  OP_REQUIRES(operands.at(axis_index).shape().rank() <= 1);
 }
 
 void ShapeValidator::visit(const ir::operation::HashtableLookup &node)
 {
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
   const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
   const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
   const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
 
-  const auto &output_obj = _ctx.at(output_index);
-  const auto &lookups_obj = _ctx.at(lookups_index);
-  const auto &keys_obj = _ctx.at(keys_index);
-  const auto &values_obj = _ctx.at(values_index);
+  const auto &output_obj = operands.at(output_index);
+  const auto &lookups_obj = operands.at(lookups_index);
+  const auto &keys_obj = operands.at(keys_index);
+  const auto &values_obj = operands.at(values_index);
 
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
   const auto &output_shape = output_obj.shape();
@@ -464,28 +476,30 @@ void ShapeValidator::visit(const ir::operation::HashtableLookup &node)
 void ShapeValidator::visit(const ir::operation::TransposeConv &node)
 {
   // shape check
+  const auto &operands = _graph.operands();
   const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
+
+  if (operands.at(ofm_index).info().isDynamic())
     return;
 
   const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
   const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
 
   // Only 4D tensors are supported
-  OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
-  OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank());
-  OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank());
+  OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(ofm_index).shape().rank() == operands.at(ifm_index).shape().rank());
+  OP_REQUIRES(operands.at(ofm_index).shape().rank() == operands.at(ker_index).shape().rank());
 
-  const auto frontend_layout = _current_layout;
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
+  const auto frontend_layout = _graph.layout();
+  const auto ofm_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
+  const auto ifm_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
   // The kernel has only IHWO layout on frontend
   // So ker_shape is treated here below
   // I -> N
   // H -> H
   // W -> W
   // O -> C
-  const auto ker_shape = _ctx.at(ker_index).shape().asFeature(ir::Layout::NHWC);
+  const auto ker_shape = operands.at(ker_index).shape().asFeature(ir::Layout::NHWC);
 
   OP_REQUIRES(ifm_shape.N == ofm_shape.N);
   OP_REQUIRES(ifm_shape.C == ker_shape.C);
@@ -494,16 +508,17 @@ void ShapeValidator::visit(const ir::operation::TransposeConv &node)
 
 void ShapeValidator::visit(const ir::operation::Gather &node)
 {
+  const auto &operands = _graph.operands();
   const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
+  if (operands.at(ofm_index).info().isDynamic())
     return;
 
   const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
   const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
 
-  const auto ifm_shape = _ctx.at(ifm_index).shape();
-  const auto indices_shape = _ctx.at(indices_index).shape();
-  const auto ofm_shape = _ctx.at(ofm_index).shape();
+  const auto ifm_shape = operands.at(ifm_index).shape();
+  const auto indices_shape = operands.at(indices_index).shape();
+  const auto ofm_shape = operands.at(ofm_index).shape();
 
   OP_REQUIRES(ifm_shape.rank() <= 4);
   OP_REQUIRES(indices_shape.rank() <= 3);
@@ -512,21 +527,22 @@ void ShapeValidator::visit(const ir::operation::Gather &node)
 
 void ShapeValidator::visit(const ir::operation::DepthToSpace &node)
 {
+  const auto &operands = _graph.operands();
   int32_t block_size = node.param().block_size;
 
   // shape check
   const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
   const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
 
-  const auto frontend_layout = _current_layout;
-  const auto output_shape = _ctx.at(output_index).shape().asFeature(frontend_layout);
-  const auto input_shape = _ctx.at(input_index).shape().asFeature(frontend_layout);
+  const auto frontend_layout = _graph.layout();
+  const auto output_shape = operands.at(output_index).shape().asFeature(frontend_layout);
+  const auto input_shape = operands.at(input_index).shape().asFeature(frontend_layout);
 
-  OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4);
-  OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(input_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(output_index).shape().rank() == 4);
 
   {
     OP_REQUIRES(output_shape.N == input_shape.N);
@@ -539,22 +555,23 @@ void ShapeValidator::visit(const ir::operation::DepthToSpace &node)
 
 void ShapeValidator::visit(const ir::operation::Pack &node)
 {
+  const auto &operands = _graph.operands();
   const auto axis{node.param().axis};
   const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
   // shape check
-  const auto &output_shape = _ctx.at(output_index).shape();
+  const auto &output_shape = operands.at(output_index).shape();
   const auto output_rank = static_cast<int32_t>(output_shape.rank());
 
   const auto input1_index{node.getInputs().at(0)};
-  const auto input_shape = _ctx.at(input1_index).shape();
+  const auto input_shape = operands.at(input1_index).shape();
 
   OP_REQUIRES(axis >= -output_rank && axis < output_rank);
   for (const auto &index : node.getInputs())
   {
-    OP_REQUIRES(input_shape == _ctx.at(index).shape());
+    OP_REQUIRES(input_shape == operands.at(index).shape());
   }
 }
 
@@ -562,8 +579,9 @@ void ShapeValidator::visit(const ir::operation::LSTM &node)
 {
   // NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
   // TODO Support dynamic rnn
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
   const auto scratch_buffer_index{
@@ -611,91 +629,96 @@ void ShapeValidator::visit(const ir::operation::LSTM &node)
     node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
   const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
 
-  OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
-  for (int i = 0; i < _ctx.at(input_index).shape().rank() - 1; ++i)
+  OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank());
+  for (int i = 0; i < operands.at(input_index).shape().rank() - 1; ++i)
   {
-    OP_REQUIRES(_ctx.at(input_index).shape().dim(i) == _ctx.at(output_index).shape().dim(i));
+    OP_REQUIRES(operands.at(input_index).shape().dim(i) ==
+                operands.at(output_index).shape().dim(i));
   }
-  OP_REQUIRES(
-    (_ctx.at(output_index).shape().rank() == 2 || _ctx.at(output_index).shape().rank() == 3) &&
-    (_ctx.at(input_index).shape().rank() == 2 || _ctx.at(input_index).shape().rank() == 3) &&
-    (!_ctx.exist(input_to_input_weights_index) ||
-     _ctx.at(input_to_input_weights_index).shape().rank() == 2) &&
-    _ctx.at(input_to_forget_weights_index).shape().rank() == 2 &&
-    _ctx.at(input_to_cell_weights_index).shape().rank() == 2 &&
-    _ctx.at(input_to_output_weights_index).shape().rank() == 2 &&
-    (!_ctx.exist(recurrent_to_input_weights_index) ||
-     _ctx.at(recurrent_to_input_weights_index).shape().rank() == 2) &&
-    _ctx.at(recurrent_to_forget_weights_index).shape().rank() == 2 &&
-    _ctx.at(recurrent_to_cell_weights_index).shape().rank() == 2 &&
-    _ctx.at(recurrent_to_output_weights_index).shape().rank() == 2 &&
-    (!_ctx.exist(projection_weights_index) ||
-     _ctx.at(projection_weights_index).shape().rank() == 2) &&
-    _ctx.at(output_state_in_index).shape().rank() == 2 &&
-    _ctx.at(cell_state_in_index).shape().rank() == 2);
-
-  OP_REQUIRES(
-    (!_ctx.exist(cell_to_input_weights_index) ||
-     _ctx.at(cell_to_input_weights_index).shape().rank() == 1) &&
-    (!_ctx.exist(cell_to_forget_weights_index) ||
-     _ctx.at(cell_to_forget_weights_index).shape().rank() == 1) &&
-    (!_ctx.exist(cell_to_output_weights_index) ||
-     _ctx.at(cell_to_output_weights_index).shape().rank() == 1) &&
-    (!_ctx.exist(input_gate_bias_index) || _ctx.at(input_gate_bias_index).shape().rank() == 1) &&
-    _ctx.at(forget_gate_bias_index).shape().rank() == 1 &&
-    _ctx.at(cell_bias_index).shape().rank() == 1 &&
-    _ctx.at(output_gate_bias_index).shape().rank() == 1 &&
-    (!_ctx.exist(projection_bias_index) || _ctx.at(projection_bias_index).shape().rank() == 1));
+  OP_REQUIRES((operands.at(output_index).shape().rank() == 2 ||
+               operands.at(output_index).shape().rank() == 3) &&
+              (operands.at(input_index).shape().rank() == 2 ||
+               operands.at(input_index).shape().rank() == 3) &&
+              (!operands.exist(input_to_input_weights_index) ||
+               operands.at(input_to_input_weights_index).shape().rank() == 2) &&
+              operands.at(input_to_forget_weights_index).shape().rank() == 2 &&
+              operands.at(input_to_cell_weights_index).shape().rank() == 2 &&
+              operands.at(input_to_output_weights_index).shape().rank() == 2 &&
+              (!operands.exist(recurrent_to_input_weights_index) ||
+               operands.at(recurrent_to_input_weights_index).shape().rank() == 2) &&
+              operands.at(recurrent_to_forget_weights_index).shape().rank() == 2 &&
+              operands.at(recurrent_to_cell_weights_index).shape().rank() == 2 &&
+              operands.at(recurrent_to_output_weights_index).shape().rank() == 2 &&
+              (!operands.exist(projection_weights_index) ||
+               operands.at(projection_weights_index).shape().rank() == 2) &&
+              operands.at(output_state_in_index).shape().rank() == 2 &&
+              operands.at(cell_state_in_index).shape().rank() == 2);
+
+  OP_REQUIRES((!operands.exist(cell_to_input_weights_index) ||
+               operands.at(cell_to_input_weights_index).shape().rank() == 1) &&
+              (!operands.exist(cell_to_forget_weights_index) ||
+               operands.at(cell_to_forget_weights_index).shape().rank() == 1) &&
+              (!operands.exist(cell_to_output_weights_index) ||
+               operands.at(cell_to_output_weights_index).shape().rank() == 1) &&
+              (!operands.exist(input_gate_bias_index) ||
+               operands.at(input_gate_bias_index).shape().rank() == 1) &&
+              operands.at(forget_gate_bias_index).shape().rank() == 1 &&
+              operands.at(cell_bias_index).shape().rank() == 1 &&
+              operands.at(output_gate_bias_index).shape().rank() == 1 &&
+              (!operands.exist(projection_bias_index) ||
+               operands.at(projection_bias_index).shape().rank() == 1));
 
   // CIFG assertion
-  OP_REQUIRES(
-    ((!_ctx.exist(input_to_input_weights_index) ||
-      (_ctx.at(input_to_input_weights_index).shape().dim(0) == 0 &&
-       _ctx.at(input_to_input_weights_index).shape().dim(1) == 0)) &&
-     (!_ctx.exist(recurrent_to_input_weights_index) ||
-      (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) == 0 &&
-       _ctx.at(recurrent_to_input_weights_index).shape().dim(1) == 0)) &&
-     (!_ctx.exist(input_gate_bias_index) || _ctx.at(input_gate_bias_index).shape().dim(0) == 0) &&
-     (!_ctx.exist(cell_to_input_weights_index) ||
-      _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0)) ||
-    ((_ctx.exist(input_to_input_weights_index) &&
-      (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
-       _ctx.at(input_to_input_weights_index).shape().dim(1) != 0)) &&
-     (_ctx.exist(recurrent_to_input_weights_index) &&
-      (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
-       _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0)) &&
-     (_ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0) != 0)));
+  OP_REQUIRES(((!operands.exist(input_to_input_weights_index) ||
+                (operands.at(input_to_input_weights_index).shape().dim(0) == 0 &&
+                 operands.at(input_to_input_weights_index).shape().dim(1) == 0)) &&
+               (!operands.exist(recurrent_to_input_weights_index) ||
+                (operands.at(recurrent_to_input_weights_index).shape().dim(0) == 0 &&
+                 operands.at(recurrent_to_input_weights_index).shape().dim(1) == 0)) &&
+               (!operands.exist(input_gate_bias_index) ||
+                operands.at(input_gate_bias_index).shape().dim(0) == 0) &&
+               (!operands.exist(cell_to_input_weights_index) ||
+                operands.at(cell_to_input_weights_index).shape().dim(0) == 0)) ||
+              ((operands.exist(input_to_input_weights_index) &&
+                (operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+                 operands.at(input_to_input_weights_index).shape().dim(1) != 0)) &&
+               (operands.exist(recurrent_to_input_weights_index) &&
+                (operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+                 operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0)) &&
+               (operands.exist(input_gate_bias_index) &&
+                operands.at(input_gate_bias_index).shape().dim(0) != 0)));
 
   // Peephole assertion
-  OP_REQUIRES(((!_ctx.exist(cell_to_forget_weights_index) ||
-                _ctx.at(cell_to_forget_weights_index).shape().dim(0) == 0) &&
-               (!_ctx.exist(cell_to_output_weights_index) ||
-                _ctx.at(cell_to_output_weights_index).shape().dim(0) == 0)) ||
-              ((_ctx.exist(cell_to_forget_weights_index) &&
-                _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0) &&
-               (_ctx.exist(cell_to_output_weights_index) &&
-                _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0)));
-
-  bool has_input_to_input_weights = _ctx.exist(input_to_input_weights_index) &&
-                                    (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
-                                     _ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
+  OP_REQUIRES(((!operands.exist(cell_to_forget_weights_index) ||
+                operands.at(cell_to_forget_weights_index).shape().dim(0) == 0) &&
+               (!operands.exist(cell_to_output_weights_index) ||
+                operands.at(cell_to_output_weights_index).shape().dim(0) == 0)) ||
+              ((operands.exist(cell_to_forget_weights_index) &&
+                operands.at(cell_to_forget_weights_index).shape().dim(0) != 0) &&
+               (operands.exist(cell_to_output_weights_index) &&
+                operands.at(cell_to_output_weights_index).shape().dim(0) != 0)));
+
+  bool has_input_to_input_weights =
+    operands.exist(input_to_input_weights_index) &&
+    (operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+     operands.at(input_to_input_weights_index).shape().dim(1) != 0);
   bool has_recurrent_to_input_weights =
-    _ctx.exist(recurrent_to_input_weights_index) &&
-    (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
-     _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
+    operands.exist(recurrent_to_input_weights_index) &&
+    (operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+     operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
   bool has_input_gate_bias =
-    _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0) != 0;
-  bool has_cell_to_input_weights = _ctx.exist(cell_to_input_weights_index) &&
-                                   _ctx.at(cell_to_input_weights_index).shape().dim(0) != 0;
-  bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) &&
-                                    _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
-  bool has_cell_to_output_weights = _ctx.exist(cell_to_output_weights_index) &&
-                                    _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
-  bool has_projection_weights = _ctx.exist(projection_weights_index) &&
-                                (_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
-                                 _ctx.at(projection_weights_index).shape().dim(1) != 0);
+    operands.exist(input_gate_bias_index) && operands.at(input_gate_bias_index).shape().dim(0) != 0;
+  bool has_cell_to_input_weights = operands.exist(cell_to_input_weights_index) &&
+                                   operands.at(cell_to_input_weights_index).shape().dim(0) != 0;
+  bool has_cell_to_forget_weights = operands.exist(cell_to_forget_weights_index) &&
+                                    operands.at(cell_to_forget_weights_index).shape().dim(0) != 0;
+  bool has_cell_to_output_weights = operands.exist(cell_to_output_weights_index) &&
+                                    operands.at(cell_to_output_weights_index).shape().dim(0) != 0;
+  bool has_projection_weights = operands.exist(projection_weights_index) &&
+                                (operands.at(projection_weights_index).shape().dim(0) != 0 &&
+                                 operands.at(projection_weights_index).shape().dim(1) != 0);
   bool has_projection_bias =
-    _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0) != 0;
+    operands.exist(projection_bias_index) && operands.at(projection_bias_index).shape().dim(0) != 0;
 
   // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
   // true: no CIFG
@@ -710,46 +733,48 @@ void ShapeValidator::visit(const ir::operation::LSTM &node)
   // NOTE The projection weights may have data but the projection bias may not.
   bool has_projection_param = has_projection_weights;
 
-  const auto batch_size = (_ctx.at(input_index).shape().rank() == 3 && node.param().time_major)
-                            ? _ctx.at(input_index).shape().dim(1)
-                            : _ctx.at(input_index).shape().dim(0);
-  OP_REQUIRES(batch_size == _ctx.at(output_state_in_index).shape().dim(0) &&
-              batch_size == _ctx.at(cell_state_in_index).shape().dim(0));
-
-  const auto input_size = _ctx.at(input_index).shape().dim(_ctx.at(input_index).shape().rank() - 1);
-  OP_REQUIRES(input_size == _ctx.at(input_to_forget_weights_index).shape().dim(1) &&
-              input_size == _ctx.at(input_to_cell_weights_index).shape().dim(1) &&
-              input_size == _ctx.at(input_to_output_weights_index).shape().dim(1));
-
-  const auto num_units = _ctx.at(input_to_output_weights_index).shape().dim(0);
-  OP_REQUIRES(num_units == _ctx.at(input_to_cell_weights_index).shape().dim(0) &&
-              num_units == _ctx.at(input_to_output_weights_index).shape().dim(0) &&
-              num_units == _ctx.at(recurrent_to_forget_weights_index).shape().dim(0) &&
-              num_units == _ctx.at(recurrent_to_cell_weights_index).shape().dim(0) &&
-              num_units == _ctx.at(recurrent_to_output_weights_index).shape().dim(0) &&
-              num_units == _ctx.at(forget_gate_bias_index).shape().dim(0) &&
-              num_units == _ctx.at(cell_bias_index).shape().dim(0) &&
-              num_units == _ctx.at(output_gate_bias_index).shape().dim(0) &&
-              num_units == _ctx.at(cell_state_in_index).shape().dim(1));
+  const auto batch_size = (operands.at(input_index).shape().rank() == 3 && node.param().time_major)
+                            ? operands.at(input_index).shape().dim(1)
+                            : operands.at(input_index).shape().dim(0);
+  OP_REQUIRES(batch_size == operands.at(output_state_in_index).shape().dim(0) &&
+              batch_size == operands.at(cell_state_in_index).shape().dim(0));
+
+  const auto input_size =
+    operands.at(input_index).shape().dim(operands.at(input_index).shape().rank() - 1);
+  OP_REQUIRES(input_size == operands.at(input_to_forget_weights_index).shape().dim(1) &&
+              input_size == operands.at(input_to_cell_weights_index).shape().dim(1) &&
+              input_size == operands.at(input_to_output_weights_index).shape().dim(1));
+
+  const auto num_units = operands.at(input_to_output_weights_index).shape().dim(0);
+  OP_REQUIRES(num_units == operands.at(input_to_cell_weights_index).shape().dim(0) &&
+              num_units == operands.at(input_to_output_weights_index).shape().dim(0) &&
+              num_units == operands.at(recurrent_to_forget_weights_index).shape().dim(0) &&
+              num_units == operands.at(recurrent_to_cell_weights_index).shape().dim(0) &&
+              num_units == operands.at(recurrent_to_output_weights_index).shape().dim(0) &&
+              num_units == operands.at(forget_gate_bias_index).shape().dim(0) &&
+              num_units == operands.at(cell_bias_index).shape().dim(0) &&
+              num_units == operands.at(output_gate_bias_index).shape().dim(0) &&
+              num_units == operands.at(cell_state_in_index).shape().dim(1));
 
   const auto output_size =
-    _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
-  OP_REQUIRES(output_size == _ctx.at(recurrent_to_forget_weights_index).shape().dim(1) &&
-              output_size == _ctx.at(recurrent_to_cell_weights_index).shape().dim(1) &&
-              output_size == _ctx.at(recurrent_to_output_weights_index).shape().dim(1) &&
-              output_size == _ctx.at(output_state_in_index).shape().dim(1));
+    operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
+  OP_REQUIRES(output_size == operands.at(recurrent_to_forget_weights_index).shape().dim(1) &&
+              output_size == operands.at(recurrent_to_cell_weights_index).shape().dim(1) &&
+              output_size == operands.at(recurrent_to_output_weights_index).shape().dim(1) &&
+              output_size == operands.at(output_state_in_index).shape().dim(1));
 
   if (has_cifg_param)
   {
-    OP_REQUIRES(input_size == _ctx.at(input_to_input_weights_index).shape().dim(1));
-    OP_REQUIRES(num_units == _ctx.at(input_to_input_weights_index).shape().dim(0) &&
-                num_units == _ctx.at(recurrent_to_input_weights_index).shape().dim(0) &&
-                ((_ctx.exist(cell_to_input_weights_index) &&
-                  num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0)) ||
-                 (!_ctx.exist(cell_to_input_weights_index) ||
-                  _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0) /* non-peephole */) &&
-                num_units == _ctx.at(input_gate_bias_index).shape().dim(0));
-    OP_REQUIRES(output_size == _ctx.at(recurrent_to_input_weights_index).shape().dim(1));
+    OP_REQUIRES(input_size == operands.at(input_to_input_weights_index).shape().dim(1));
+    OP_REQUIRES(
+      num_units == operands.at(input_to_input_weights_index).shape().dim(0) &&
+      num_units == operands.at(recurrent_to_input_weights_index).shape().dim(0) &&
+      ((operands.exist(cell_to_input_weights_index) &&
+        num_units == operands.at(cell_to_input_weights_index).shape().dim(0)) ||
+       (!operands.exist(cell_to_input_weights_index) ||
+        operands.at(cell_to_input_weights_index).shape().dim(0) == 0) /* non-peephole */) &&
+      num_units == operands.at(input_gate_bias_index).shape().dim(0));
+    OP_REQUIRES(output_size == operands.at(recurrent_to_input_weights_index).shape().dim(1));
     OP_REQUIRES(has_input_to_input_weights && has_recurrent_to_input_weights &&
                 has_input_gate_bias);
     if (has_cell_to_input_weights)
@@ -757,64 +782,65 @@ void ShapeValidator::visit(const ir::operation::LSTM &node)
       // NOTE The cell_to_input_weights exist only in case of non-CIFG and peephole.
       OP_REQUIRES(has_peephole_param);
     }
-    if (_ctx.exist(scratch_buffer_index))
-      OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 4);
+    if (operands.exist(scratch_buffer_index))
+      OP_REQUIRES(operands.at(scratch_buffer_index).shape().dim(1) == num_units * 4);
   }
   else
   {
-    if (_ctx.exist(scratch_buffer_index))
-      OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 3);
+    if (operands.exist(scratch_buffer_index))
+      OP_REQUIRES(operands.at(scratch_buffer_index).shape().dim(1) == num_units * 3);
   }
 
   if (has_peephole_param)
   {
-    OP_REQUIRES(num_units == _ctx.at(cell_to_forget_weights_index).shape().dim(0) &&
-                num_units == _ctx.at(cell_to_output_weights_index).shape().dim(0) &&
-                (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) ||
-                 _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */));
+    OP_REQUIRES(num_units == operands.at(cell_to_forget_weights_index).shape().dim(0) &&
+                num_units == operands.at(cell_to_output_weights_index).shape().dim(0) &&
+                (num_units == operands.at(cell_to_input_weights_index).shape().dim(0) ||
+                 operands.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */));
   }
 
   if (has_projection_param)
   {
-    OP_REQUIRES(num_units == _ctx.at(projection_weights_index).shape().dim(1));
-    OP_REQUIRES(output_size == _ctx.at(projection_weights_index).shape().dim(0));
+    OP_REQUIRES(num_units == operands.at(projection_weights_index).shape().dim(1));
+    OP_REQUIRES(output_size == operands.at(projection_weights_index).shape().dim(0));
     if (has_projection_bias)
     {
-      OP_REQUIRES(output_size == _ctx.at(projection_bias_index).shape().dim(0));
+      OP_REQUIRES(output_size == operands.at(projection_bias_index).shape().dim(0));
     }
   }
 
-  if (_ctx.exist(scratch_buffer_index))
+  if (operands.exist(scratch_buffer_index))
   {
-    OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().rank() == 2);
-    OP_REQUIRES(batch_size == _ctx.at(scratch_buffer_index).shape().dim(0));
+    OP_REQUIRES(operands.at(scratch_buffer_index).shape().rank() == 2);
+    OP_REQUIRES(batch_size == operands.at(scratch_buffer_index).shape().dim(0));
   }
 
-  if (_ctx.exist(output_state_out_index))
+  if (operands.exist(output_state_out_index))
   {
-    OP_REQUIRES(_ctx.at(output_state_out_index).shape().rank() == 2);
-    OP_REQUIRES(batch_size == _ctx.at(output_state_out_index).shape().dim(0));
-    OP_REQUIRES(output_size == _ctx.at(output_state_out_index).shape().dim(1));
+    OP_REQUIRES(operands.at(output_state_out_index).shape().rank() == 2);
+    OP_REQUIRES(batch_size == operands.at(output_state_out_index).shape().dim(0));
+    OP_REQUIRES(output_size == operands.at(output_state_out_index).shape().dim(1));
   }
 
-  if (_ctx.exist(cell_state_out_index))
+  if (operands.exist(cell_state_out_index))
   {
-    OP_REQUIRES(_ctx.at(cell_state_out_index).shape().rank() == 2);
-    OP_REQUIRES(batch_size == _ctx.at(cell_state_out_index).shape().dim(0));
-    OP_REQUIRES(num_units == _ctx.at(cell_state_out_index).shape().dim(1));
+    OP_REQUIRES(operands.at(cell_state_out_index).shape().rank() == 2);
+    OP_REQUIRES(batch_size == operands.at(cell_state_out_index).shape().dim(0));
+    OP_REQUIRES(num_units == operands.at(cell_state_out_index).shape().dim(1));
   }
 }
 
 void ShapeValidator::visit(const ir::operation::L2Normalization &node)
 {
+  const auto &operands = _graph.operands();
   const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
+  if (operands.at(ofm_index).info().isDynamic())
     return;
 
   const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
 
-  auto ifm_shape = _ctx.at(ifm_index).shape();
-  auto ofm_shape = _ctx.at(ofm_index).shape();
+  auto ifm_shape = operands.at(ifm_index).shape();
+  auto ofm_shape = operands.at(ofm_index).shape();
 
   OP_REQUIRES(ifm_shape.rank() == ofm_shape.rank());
 
@@ -826,14 +852,15 @@ void ShapeValidator::visit(const ir::operation::L2Normalization &node)
 
 void ShapeValidator::visit(const ir::operation::Unpack &node)
 {
+  const auto &operands = _graph.operands();
   const auto axis{node.param().axis};
   const auto output_index{node.getInputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
   const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
 
-  const auto &input_shape = _ctx.at(input_index).shape();
+  const auto &input_shape = operands.at(input_index).shape();
   const auto input_rank = static_cast<int32_t>(input_shape.rank());
 
   OP_REQUIRES(axis >= -input_rank && axis < input_rank);
@@ -841,22 +868,23 @@ void ShapeValidator::visit(const ir::operation::Unpack &node)
 
 void ShapeValidator::visit(const ir::operation::Pad &node)
 {
+  const auto &operands = _graph.operands();
   const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
-  OP_REQUIRES(_ctx.at(pad_index).typeInfo().type() == ir::DataType::INT32);
+  OP_REQUIRES(operands.at(pad_index).typeInfo().type() == ir::DataType::INT32);
 
   const auto output_index{node.getInputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
   const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
 
-  const auto &pad_shape = _ctx.at(pad_index).shape();
-  const auto input_rank = static_cast<int32_t>(_ctx.at(input_index).shape().rank());
+  const auto &pad_shape = operands.at(pad_index).shape();
+  const auto input_rank = static_cast<int32_t>(operands.at(input_index).shape().rank());
 
   OP_REQUIRES(pad_shape.rank() == 2);
   OP_REQUIRES(pad_shape.dim(0) == input_rank);
   OP_REQUIRES(pad_shape.dim(1) == 2);
-  OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
+  OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank());
 }
 
 void ShapeValidator::visit(const ir::operation::Select &)
@@ -866,65 +894,70 @@ void ShapeValidator::visit(const ir::operation::Select &)
 
 void ShapeValidator::visit(const ir::operation::StridedSlice &node)
 {
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
 
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
-  OP_REQUIRES(_ctx.at(input_index).shape().rank() <= 4);
+  OP_REQUIRES(operands.at(input_index).shape().rank() <= 4);
 }
 
 void ShapeValidator::visit(const ir::operation::Split &node)
 {
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
   const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
   const auto axis_index{node.getInputs().at(ir::operation::Split::Input::AXIS)};
 
   const auto num_splits = node.param().num_splits;
-  const auto input_rank = _ctx.at(input_index).shape().rank();
-  auto axis = *reinterpret_cast<const int32_t *>(_ctx.at(axis_index).data()->base());
+  const auto input_rank = operands.at(input_index).shape().rank();
+  auto axis = *reinterpret_cast<const int32_t *>(operands.at(axis_index).data()->base());
   axis = axis < 0 ? axis + input_rank : axis;
 
   OP_REQUIRES(axis >= 0 && axis < input_rank);
-  OP_REQUIRES(_ctx.at(input_index).shape().dim(axis) % num_splits == 0);
+  OP_REQUIRES(operands.at(input_index).shape().dim(axis) % num_splits == 0);
 }
 
 void ShapeValidator::visit(const ir::operation::Shape &node)
 {
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
   const auto input_index{node.getInputs().at(0)};
   UNUSED_RELEASE(input_index);
-  OP_REQUIRES(_ctx.at(output_index).shape().rank() == 1);
+  OP_REQUIRES(operands.at(output_index).shape().rank() == 1);
 }
 
 void ShapeValidator::visit(const ir::operation::ResizeBilinear &node)
 {
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
 
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
   {
     return;
   }
-  OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4);
-  OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(input_index).shape().rank() == 4);
+  OP_REQUIRES(operands.at(output_index).shape().rank() == 4);
 }
 
 void ShapeValidator::visit(const ir::operation::Reverse &node)
 {
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::Reverse::Input::INPUT)};
 
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
-  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+  OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape());
 }
 
 void ShapeValidator::visit(const ir::operation::If &)
@@ -940,17 +973,18 @@ void ShapeValidator::visit(const ir::operation::While &)
 
 void ShapeValidator::visit(const ir::operation::SquaredDifference &node)
 {
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(0)};
   const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
   const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
 
   // Check for dimension constraints
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
-  auto output_shape = _ctx.at(output_index).shape();
-  auto lhs_shape = _ctx.at(lhs_index).shape();
-  auto rhs_shape = _ctx.at(rhs_index).shape();
+  auto output_shape = operands.at(output_index).shape();
+  auto lhs_shape = operands.at(lhs_index).shape();
+  auto rhs_shape = operands.at(rhs_index).shape();
   // Check for output rank
   OP_REQUIRES(output_shape.rank() == std::max(lhs_shape.rank(), rhs_shape.rank()));
   auto min_rank = std::min(lhs_shape.rank(), rhs_shape.rank());
@@ -982,36 +1016,40 @@ void ShapeValidator::visit(const ir::operation::SquaredDifference &node)
 }
 void ShapeValidator::visit(const ir::operation::Tile &node)
 {
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
   const auto input_index{node.getInputs().at(0)};
   const auto multiple_index{node.getInputs().at(1)};
 
-  OP_REQUIRES(_ctx.at(multiple_index).shape().rank() == 1);
-  OP_REQUIRES(_ctx.at(multiple_index).shape().dim(0) == _ctx.at(input_index).shape().rank());
-  OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
+  OP_REQUIRES(operands.at(multiple_index).shape().rank() == 1);
+  OP_REQUIRES(operands.at(multiple_index).shape().dim(0) ==
+              operands.at(input_index).shape().rank());
+  OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank());
 }
 
 void ShapeValidator::visit(const ir::operation::Range &node)
 {
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(0)};
   const auto start_index{node.getInputs().at(ir::operation::Range::Input::START)};
   const auto limit_index{node.getInputs().at(ir::operation::Range::Input::LIMIT)};
   const auto delta_index{node.getInputs().at(ir::operation::Range::Input::DELTA)};
 
   // Check for dimension constraints
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
-  OP_REQUIRES(_ctx.at(start_index).shape().rank() == 0);
-  OP_REQUIRES(_ctx.at(limit_index).shape().rank() == 0);
-  OP_REQUIRES(_ctx.at(delta_index).shape().rank() == 0);
+  OP_REQUIRES(operands.at(start_index).shape().rank() == 0);
+  OP_REQUIRES(operands.at(limit_index).shape().rank() == 0);
+  OP_REQUIRES(operands.at(delta_index).shape().rank() == 0);
 }
 
 void ShapeValidator::visit(const ir::operation::MatrixBandPart &node)
 {
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(0)};
   const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT)};
   const auto num_lower_index{
@@ -1020,23 +1058,24 @@ void ShapeValidator::visit(const ir::operation::MatrixBandPart &node)
     node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_UPPER_DIAG)};
 
   // Check for dimension constraints
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
-  OP_REQUIRES(_ctx.at(input_index).shape().rank() >= 2);     // input must be more than 2 dim matrix
-  OP_REQUIRES(_ctx.at(num_upper_index).shape().rank() == 0); // num_lower must be scalar
-  OP_REQUIRES(_ctx.at(num_lower_index).shape().rank() == 0); // num_upper must be scalar
+  OP_REQUIRES(operands.at(input_index).shape().rank() >= 2); // input must be more than 2 dim matrix
+  OP_REQUIRES(operands.at(num_upper_index).shape().rank() == 0); // num_lower must be scalar
+  OP_REQUIRES(operands.at(num_lower_index).shape().rank() == 0); // num_upper must be scalar
 }
 
 void ShapeValidator::visit(const ir::operation::LogSoftmax &node)
 {
+  const auto &operands = _graph.operands();
   const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
+  if (operands.at(output_index).info().isDynamic())
     return;
 
   const auto input_index{node.getInputs().at(0)};
 
-  OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
+  OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank());
 }
 
 } // namespace compiler
diff --git a/runtime/onert/core/src/compiler/ShapeValidator.h b/runtime/onert/core/src/compiler/ShapeValidator.h
index 763cf7ce3..a51e8adc0 100644
--- a/runtime/onert/core/src/compiler/ShapeValidator.h
+++ b/runtime/onert/core/src/compiler/ShapeValidator.h
@@ -39,8 +39,13 @@ class ShapeValidator : public ir::OperationVisitor
 public:
   ShapeValidator(void) = delete;
   ShapeValidator(const ir::Graph &graph);
+  ShapeValidator(const ShapeValidator &) = delete;
+  ShapeValidator(ShapeValidator &&) = delete;
+  ~ShapeValidator() = default;
 
 public:
+  ShapeValidator &operator=(const ShapeValidator &) = delete;
+  ShapeValidator &operator=(ShapeValidator &&) = delete;
   void operator()();
 
 public:
@@ -90,10 +95,7 @@ private:
   void checkUnaryOp(const ir::Operation &node);
 
 private:
-  // TODO Remove _ctx field
   const ir::Graph &_graph;
-  const ir::Operands &_ctx;
-  ir::Layout _current_layout;
 };
 
 } // namespace compiler
diff --git a/runtime/onert/core/src/compiler/StaticShapeInferer.cc b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
index f2fee2c3c..485450560 100644
--- a/runtime/onert/core/src/compiler/StaticShapeInferer.cc
+++ b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
@@ -19,62 +19,90 @@
 #include "util/logging.h"
 
 #include <sstream>
+#include <stdexcept>
 
 namespace onert
 {
 namespace compiler
 {
-
-void StaticShapeInferer::inferSubgraph(ir::SubgraphIndex subg_ind)
+void OperandObserver::updateShapes(const std::vector<ir::OperandInfo> &changed_operands_info,
+                                   bool unpredictable)
 {
-  StaticShapeInferer inferer(subg_ind, _lowered_subgs);
-  auto &lgraph = _lowered_subgs.at(subg_ind);
-  for (auto op_ind : lgraph->graph().topolSortOperations())
+  assert(changed_operands_info.size() == _operands.size());
+  for (size_t i = 0; i < changed_operands_info.size(); ++i)
   {
-    auto &op = lgraph->graph().operations().at(op_ind);
-    bool has_dynamic_tensor = inferer.infer(op);
-    lgraph->setHasDynamicTensor(op_ind, has_dynamic_tensor);
+    const auto &changed_operand_info = changed_operands_info.at(i);
+    auto &operand = _operands.at(i);
+    // assert(changed_operand_info.typeInfo() == operand->typeInfo());
+    // assert(changed_operand_info.typeInfo() == operand->typeInfo());
+    // This error check may by replaced by an assertion if this function is called after the
+    // validation of models are completed.
+    if (changed_operand_info.typeInfo() != operand->typeInfo())
+    {
+      throw std::runtime_error("OperandObserver: The types of operands are mismatched");
+    }
+    if (!operand->info().isConstant() && (changed_operand_info.isDynamic() || unpredictable))
+    {
+      operand->info().setDynamic();
+    }
+    else
+    {
+      const auto &new_shape = changed_operands_info.at(i).shape();
+      operand->info().shape(new_shape);
+    }
   }
 }
 
-bool StaticShapeInferer::infer(const ir::Operation &op)
+void StaticShapeInferer::infer()
 {
-  bool has_dynamic_tensor = false;
-
-  auto opcode = op.opcode();
-
-  _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit()
-
-  // IF: need shape inference for then, else
-  // While: need shape inference for condition, body
-  if (opcode == ir::OpCode::If || opcode == ir::OpCode::While)
-  {
-    op.accept(*this);
-  }
-  else
+  for (const auto &op_idx : _lowered_subg->graph().topolSortOperations())
   {
-    _return_has_dynamic_tensor = checkDynamicInput(op);
-
-    if (_return_has_dynamic_tensor)
+    const auto &op = _lowered_subg->graph().operations().at(op_idx);
+    bool has_dynamic_tensor = false;
+    const auto opcode = op.opcode();
+    // IF: requires shape inference for then, else
+    // While: requires shape inference for condition, body
+    if (opcode == ir::OpCode::If || opcode == ir::OpCode::While)
     {
-      setDynamicOutput(op);
+      op.accept(*this);
     }
     else
     {
-      op.accept(*this);
+      has_dynamic_tensor = checkDynamicInput(op);
+      if (has_dynamic_tensor)
+      {
+        setDynamicOutput(op);
+      }
+      else
+      {
+        op.accept(*this);
+      }
     }
+    has_dynamic_tensor = has_dynamic_tensor || checkDynamicOutput(op);
+    _lowered_subg->setHasDynamicTensor(op_idx, has_dynamic_tensor);
   }
 
-  has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor;
-
-  return has_dynamic_tensor;
+  if (_controlflow_output_observer != nullptr)
+  {
+    // re-sizing output shapes of the controflow operation branching to this subgraph
+    std::vector<ir::OperandInfo> outputs_info;
+    const auto &graph = _lowered_subg->graph();
+    const auto &outputs = graph.getOutputs();
+    for (size_t i = 0; i < outputs.size(); ++i)
+    {
+      const auto &operand_info = graph.operands().at(outputs.at(i)).info();
+      outputs_info.emplace_back(operand_info);
+    }
+    _controlflow_output_observer->updateShapes(outputs_info);
+  }
 }
 
 bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op)
 {
+  const auto &operands = _lowered_subg->graph().operands();
   for (auto input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED)
   {
-    if (_operands.at(input_idx).info().isDynamic())
+    if (operands.at(input_idx).info().isDynamic())
     {
       return true;
     }
@@ -83,11 +111,25 @@ bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op)
   return false;
 }
 
+bool StaticShapeInferer::checkDynamicOutput(const ir::Operation &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+  for (auto output_idx : op.getOutputs() | ir::Remove::UNDEFINED)
+  {
+    if (operands.at(output_idx).info().isDynamic())
+    {
+      return true;
+    }
+  }
+  return false;
+}
+
 void StaticShapeInferer::setDynamicOutput(const ir::Operation &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
   for (auto output_idx : op.getOutputs() | ir::Remove::UNDEFINED)
   {
-    _operands.at(output_idx).info().setDynamic();
+    operands.at(output_idx).info().setDynamic();
   }
 }
 
@@ -95,11 +137,12 @@ void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
                                                   const ir::OperandIndex lhs_idx,
                                                   const ir::OperandIndex rhs_idx)
 {
-  const auto &lhs = _operands.at(lhs_idx);
-  const auto &rhs = _operands.at(rhs_idx);
+  auto &operands = _lowered_subg->graph().operands();
+  const auto &lhs = operands.at(lhs_idx);
+  const auto &rhs = operands.at(rhs_idx);
 
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   // re-sizing output shape
   ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs.info().shape(), rhs.info().shape());
@@ -109,11 +152,12 @@ void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
 void StaticShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
                                              const ir::OperandIndex input_idx)
 {
-  const auto &input = _operands.at(input_idx);
+  auto &operands = _lowered_subg->graph().operands();
+  const auto &input = operands.at(input_idx);
 
   // get mutable output operand
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   // re-sizing output shape
   ir::Shape new_shape = input.info().shape();
@@ -136,36 +180,31 @@ void StaticShapeInferer::dump()
     return sstream.str();
   };
 
-  for (const auto &pair : _lowered_subgs)
-  {
-    const auto index = pair.first;
-    const auto &lowered_subg = pair.second;
-    VERBOSE(StaticShapeInferer) << index << std::endl;
-    lowered_subg->graph().operands().iterate(
-      [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
-        VERBOSE(StaticShapeInferer)
-          << "  " << ind << ", " << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", "
-          << get_shape_str(operand.info().shape()) << std::endl;
-      });
-  }
+  _lowered_subg->graph().operands().iterate(
+    [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+      VERBOSE(StaticShapeInferer) << "  " << ind << ", "
+                                  << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", "
+                                  << get_shape_str(operand.info().shape()) << std::endl;
+    });
 }
 
 void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
+  const auto &input = operands.at(input_idx);
 
   const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
-  const auto &axis = _operands.at(axis_idx);
+  const auto &axis = operands.at(axis_idx);
 
   // get mutable output operand
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   if (!axis.isConstant())
   {
     output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
     return;
   }
 
@@ -181,27 +220,31 @@ void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op)
 
 void StaticShapeInferer::visit(const ir::operation::BatchMatMul &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto lhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::LHS);
   const auto rhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::RHS);
   const auto output_index = op.getOutputs().at(0);
-  const auto &lhs = _operands.at(lhs_index);
-  const auto &rhs = _operands.at(rhs_index);
-  auto &output = _operands.at(output_index);
+  const auto &lhs = operands.at(lhs_index);
+  const auto &rhs = operands.at(rhs_index);
+  auto &output = operands.at(output_index);
   auto new_shape = shape_inference::inferBatchMatMulShape(lhs.shape(), rhs.shape(), op.param());
   output.info().shape(new_shape);
 }
 
 void StaticShapeInferer::visit(const ir::operation::BCQFullyConnected &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_idx{op.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
+  const auto &input = operands.at(input_idx);
 
   const auto cluster_idx{
     op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
-  const auto &cluster = _operands.at(cluster_idx);
+  const auto &cluster = operands.at(cluster_idx);
 
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   auto cluster_buf = reinterpret_cast<const int32_t *>(cluster.data()->base());
   assert(cluster_buf);
@@ -214,17 +257,19 @@ void StaticShapeInferer::visit(const ir::operation::BCQFullyConnected &op)
 
 void StaticShapeInferer::visit(const ir::operation::BCQGather &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto indices_idx{op.getInputs().at(ir::operation::BCQGather::Input::INDICES)};
-  const auto &indices = _operands.at(indices_idx);
+  const auto &indices = operands.at(indices_idx);
 
   const auto input_binary_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)};
-  const auto &input_binary = _operands.at(input_binary_idx);
+  const auto &input_binary = operands.at(input_binary_idx);
 
   const auto cluster_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
-  const auto &cluster = _operands.at(cluster_idx);
+  const auto &cluster = operands.at(cluster_idx);
 
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   auto cluster_buf = reinterpret_cast<const int32_t *>(cluster.data()->base());
   assert(cluster_buf);
@@ -247,16 +292,16 @@ void StaticShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
 void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
 {
   // get mutable output operand
+  auto &operands = _lowered_subg->graph().operands();
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   const auto shape_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::SHAPE)};
-  const auto &shape = _operands.at(shape_idx);
+  const auto &shape = operands.at(shape_idx);
 
   if (!shape.isConstant())
   {
     output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
     return;
   }
 
@@ -276,16 +321,18 @@ void StaticShapeInferer::visit(const ir::operation::Comparison &op)
 
 void StaticShapeInferer::visit(const ir::operation::Concat &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_count = op.getInputs().size();
 
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   shape_inference::Shapes input_shapes;
   for (uint32_t i = 0; i < input_count; i++)
   {
     const auto input_idx{op.getInputs().at(i)};
-    const auto &input = _operands.at(input_idx);
+    const auto &input = operands.at(input_idx);
     input_shapes.emplace_back(input.shape());
   }
 
@@ -297,12 +344,14 @@ void StaticShapeInferer::visit(const ir::operation::Concat &op)
 
 void StaticShapeInferer::visit(const ir::operation::Conv2D &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_idx{op.getInputs().at(ir::operation::Conv2D::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
+  const auto &input = operands.at(input_idx);
   const auto ker_idx{op.getInputs().at(ir::operation::Conv2D::Input::KERNEL)};
-  const auto &ker = _operands.at(ker_idx);
+  const auto &ker = operands.at(ker_idx);
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   // re-sizing output shape
   ir::Shape new_shape =
@@ -328,17 +377,18 @@ void StaticShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
 
 void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_idx{op.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
+  const auto &input = operands.at(input_idx);
   const auto axis_idx{op.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
-  const auto &axis = _operands.at(axis_idx);
+  const auto &axis = operands.at(axis_idx);
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   if (!axis.isConstant())
   {
     output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
     return;
   }
 
@@ -360,15 +410,16 @@ void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
 
 void StaticShapeInferer::visit(const ir::operation::Fill &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto shape_idx{op.getInputs().at(ir::operation::Fill::Input::SHAPE)};
-  const auto &shape = _operands.at(shape_idx);
+  const auto &shape = operands.at(shape_idx);
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   if (!shape.isConstant())
   {
     output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
     return;
   }
 
@@ -390,15 +441,17 @@ void StaticShapeInferer::visit(const ir::operation::Fill &op)
 
 void StaticShapeInferer::visit(const ir::operation::FullyConnected &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_idx{op.getInputs().at(ir::operation::FullyConnected::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
+  const auto &input = operands.at(input_idx);
 
   const auto ker_idx{op.getInputs().at(ir::operation::FullyConnected::Input::WEIGHT)};
-  const auto &ker = _operands.at(ker_idx);
+  const auto &ker = operands.at(ker_idx);
 
   // get mutable output operand
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
   // re-sizing output shape
   ir::Shape new_shape =
     shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape());
@@ -412,15 +465,17 @@ void StaticShapeInferer::visit(const ir::operation::FusedBatchNorm &op)
 
 void StaticShapeInferer::visit(const ir::operation::Gather &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_idx{op.getInputs().at(ir::operation::Gather::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
+  const auto &input = operands.at(input_idx);
 
   // get mutable output operand
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)};
-  const auto &indices = _operands.at(indices_idx);
+  const auto &indices = operands.at(indices_idx);
   const auto rank = input.info().shape().rank();
   const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
 
@@ -434,70 +489,21 @@ void StaticShapeInferer::visit(const ir::operation::Gather &op)
 
 void StaticShapeInferer::visit(const ir::operation::If &op)
 {
-  auto &then_graph = _lowered_subgs.at(op.param().then_subg_index)->graph();
-  auto &else_graph = _lowered_subgs.at(op.param().else_subg_index)->graph();
+  // re-sizing input shapes of then/else subgraph
   const std::vector<ir::OperandIndex> inputs{op.getInputs().begin() + 1, op.getInputs().end()};
-  const auto &outputs = op.getOutputs();
 
-  // re-sizing input shapes of then subgraph
-  const auto &then_inputs = then_graph.getInputs();
-  assert(inputs.size() == then_inputs.size());
+  std::vector<ir::OperandInfo> inputs_info;
+  const auto &graph = _lowered_subg->graph();
   for (size_t i = 0; i < inputs.size(); ++i)
   {
-    auto &then_input = then_graph.operands().at(then_inputs.at(i));
-    if (_operands.at(inputs.at(i)).info().isDynamic())
-    {
-      then_input.info().setDynamic();
-    }
-    else
-    {
-      auto new_shape = _operands.at(inputs.at(i)).info().shape();
-      then_input.info().shape(new_shape);
-    }
+    const auto &operand_info = graph.operands().at(inputs.at(i)).info();
+    inputs_info.emplace_back(operand_info);
   }
+  _subg_input_observers.at(op.param().then_subg_index)->updateShapes(inputs_info);
+  _child_inferers.at(op.param().then_subg_index)->infer();
 
-  // re-sizing input shapes of else subgraph
-  const auto &else_inputs = else_graph.getInputs();
-  assert(inputs.size() == else_inputs.size());
-  for (size_t i = 0; i < inputs.size(); ++i)
-  {
-    auto &else_input = else_graph.operands().at(else_inputs.at(i));
-    if (_operands.at(inputs.at(i)).info().isDynamic())
-    {
-      else_input.info().setDynamic();
-    }
-    else
-    {
-      const auto &new_shape = _operands.at(inputs.at(i)).info().shape();
-      else_input.info().shape(new_shape);
-    }
-  }
-
-  inferSubgraph(op.param().then_subg_index);
-  inferSubgraph(op.param().else_subg_index);
-
-  // re-sizing output shapes
-  // TODO use then_graph / else_graph instead
-  const auto &then_outputs = _lowered_subgs.at(op.param().then_subg_index)->graph().getOutputs();
-  const auto &else_outputs = _lowered_subgs.at(op.param().else_subg_index)->graph().getOutputs();
-  assert(outputs.size() == then_outputs.size());
-  assert(outputs.size() == else_outputs.size());
-  for (size_t i = 0; i < outputs.size(); ++i)
-  {
-    const auto &then_output = then_graph.operands().at(then_outputs.at(i));
-    const auto &else_output = else_graph.operands().at(else_outputs.at(i));
-    auto &output = _operands.at(outputs.at(i));
-    if (!then_output.info().isDynamic() && !else_output.info().isDynamic() &&
-        then_output.shape() == else_output.shape())
-    {
-      output.info().shape(then_output.shape());
-    }
-    else
-    {
-      output.info().setDynamic();
-      _return_has_dynamic_tensor = true;
-    }
-  }
+  _subg_input_observers.at(op.param().else_subg_index)->updateShapes(inputs_info);
+  _child_inferers.at(op.param().else_subg_index)->infer();
 }
 
 void StaticShapeInferer::visit(const ir::operation::L2Normalization &op)
@@ -507,8 +513,10 @@ void StaticShapeInferer::visit(const ir::operation::L2Normalization &op)
 
 void StaticShapeInferer::visit(const ir::operation::LSTM &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto output_index{op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
-  auto &output = _operands.at(output_index);
+  auto &output = operands.at(output_index);
 
   const auto output_state_out_index{
     op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
@@ -518,24 +526,24 @@ void StaticShapeInferer::visit(const ir::operation::LSTM &op)
   const auto scratch_buffer_index{op.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
 
   if (output.info().isDynamic() ||
-      (_operands.exist(output_state_out_index) &&
-       _operands.at(output_state_out_index).info().isDynamic()) ||
-      (_operands.exist(cell_state_out_index) &&
-       _operands.at(cell_state_out_index).info().isDynamic()) ||
-      (_operands.exist(scratch_buffer_index) &&
-       _operands.at(scratch_buffer_index).info().isDynamic()))
+      (operands.exist(output_state_out_index) &&
+       operands.at(output_state_out_index).info().isDynamic()) ||
+      (operands.exist(cell_state_out_index) &&
+       operands.at(cell_state_out_index).info().isDynamic()) ||
+      (operands.exist(scratch_buffer_index) &&
+       operands.at(scratch_buffer_index).info().isDynamic()))
     return;
 
   const auto input_index{op.getInputs().at(ir::operation::LSTM::Input::INPUT)};
-  const auto &input = _operands.at(input_index);
+  const auto &input = operands.at(input_index);
 
   const auto input_to_output_weights_index{
     op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
-  const auto &input_to_output_weights = _operands.at(input_to_output_weights_index);
+  const auto &input_to_output_weights = operands.at(input_to_output_weights_index);
 
   const auto recurrent_to_output_weights_index{
     op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
-  const auto &recurrent_to_output_weights = _operands.at(recurrent_to_output_weights_index);
+  const auto &recurrent_to_output_weights = operands.at(recurrent_to_output_weights_index);
 
   // re-sizing outputs
   const int n_batch = (input.shape().rank() == 3 && op.param().time_major) ? input.shape().dim(1)
@@ -555,21 +563,21 @@ void StaticShapeInferer::visit(const ir::operation::LSTM &op)
     output.info().shape(ir::Shape{n_batch, n_output});
   }
 
-  if (_operands.exist(output_state_out_index))
+  if (operands.exist(output_state_out_index))
   {
-    auto &output_state_out = _operands.at(output_state_out_index);
+    auto &output_state_out = operands.at(output_state_out_index);
     output_state_out.info().shape(ir::Shape{n_batch, n_output});
   }
 
-  if (_operands.exist(cell_state_out_index))
+  if (operands.exist(cell_state_out_index))
   {
-    auto &cell_state_out = _operands.at(cell_state_out_index);
+    auto &cell_state_out = operands.at(cell_state_out_index);
     cell_state_out.info().shape(ir::Shape{n_batch, n_cell});
   }
 
-  if (_operands.exist(scratch_buffer_index))
+  if (operands.exist(scratch_buffer_index))
   {
-    auto &scratch_buffer = _operands.at(scratch_buffer_index);
+    auto &scratch_buffer = operands.at(scratch_buffer_index);
 
     const auto input_to_input_weights_index{
       op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
@@ -577,11 +585,11 @@ void StaticShapeInferer::visit(const ir::operation::LSTM &op)
       op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
 
     bool has_input_to_input_weights =
-      _operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
-      _operands.at(input_to_input_weights_index).shape().dim(1) != 0;
+      operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+      operands.at(input_to_input_weights_index).shape().dim(1) != 0;
     bool has_recurrent_to_input_weights =
-      _operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
-      _operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+      operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+      operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
 
     // NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
     // true: no CIFG
@@ -605,20 +613,21 @@ void StaticShapeInferer::visit(const ir::operation::MatrixBandPart &op)
 
 void StaticShapeInferer::visit(const ir::operation::OneHot &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto indice_idx{op.getInputs().at(ir::operation::OneHot::Input::INDICES)};
-  const auto &indice = _operands.at(indice_idx);
+  const auto &indice = operands.at(indice_idx);
   const auto depth_idx{op.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
-  const auto &depth = _operands.at(depth_idx);
+  const auto &depth = operands.at(depth_idx);
 
   const auto axis = op.param().axis;
 
   auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   if (!depth.isConstant())
   {
     output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
     return;
   }
 
@@ -631,12 +640,14 @@ void StaticShapeInferer::visit(const ir::operation::OneHot &op)
 
 void StaticShapeInferer::visit(const ir::operation::Pack &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_idx{op.getInputs().at(0)};
-  const auto &input = _operands.at(input_idx);
+  const auto &input = operands.at(input_idx);
 
   // get mutable output operand
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   const auto rank = input.shape().rank() + 1;
   const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
@@ -651,21 +662,22 @@ void StaticShapeInferer::visit(const ir::operation::Pack &op)
 
 void StaticShapeInferer::visit(const ir::operation::Pad &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_idx{op.getInputs().at(ir::operation::Pad::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
+  const auto &input = operands.at(input_idx);
 
   const auto pad_idx{op.getInputs().at(ir::operation::Pad::Input::PAD)};
-  const auto &pad = _operands.at(pad_idx);
+  const auto &pad = operands.at(pad_idx);
 
   // get mutable output operand
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   // if pad is not constant, output also becomes dynamic
   if (!pad.isConstant())
   {
     output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
     return;
   }
 
@@ -678,10 +690,12 @@ void StaticShapeInferer::visit(const ir::operation::Pad &op)
 
 void StaticShapeInferer::visit(const ir::operation::Permute &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_idx{op.getInputs().at(0)};
-  const auto &input = _operands.at(input_idx);
+  const auto &input = operands.at(input_idx);
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   // re-sizing output shape
   // Permute is a special operation that layouts of input/output may be different on backend
@@ -700,16 +714,18 @@ void StaticShapeInferer::visit(const ir::operation::Pow &op)
 
 void StaticShapeInferer::visit(const ir::operation::Range &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto start_idx{op.getInputs().at(ir::operation::Range::Input::START)};
   const auto limit_idx{op.getInputs().at(ir::operation::Range::Input::LIMIT)};
   const auto delta_idx{op.getInputs().at(ir::operation::Range::Input::DELTA)};
-  const auto &start_op = _operands.at(start_idx);
-  const auto &limit_op = _operands.at(limit_idx);
-  const auto &delta_op = _operands.at(delta_idx);
+  const auto &start_op = operands.at(start_idx);
+  const auto &limit_op = operands.at(limit_idx);
+  const auto &delta_op = operands.at(delta_idx);
 
   // get mutable output operand
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   ir::Shape new_shape;
   if (start_op.isConstant() && limit_op.isConstant() && delta_op.isConstant())
@@ -731,21 +747,22 @@ void StaticShapeInferer::visit(const ir::operation::Range &op)
   else
   {
     output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
   }
 }
 
 void StaticShapeInferer::visit(const ir::operation::Reduce &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_idx{op.getInputs().at(ir::operation::Reduce::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
+  const auto &input = operands.at(input_idx);
 
   const auto axes_idx{op.getInputs().at(ir::operation::Reduce::Input::AXES)};
-  const auto &axes = _operands.at(axes_idx);
+  const auto &axes = operands.at(axes_idx);
 
   // get mutable output operand
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   std::vector<int32_t> axes_vec;
   for (size_t i = 0; i < axes.shape().num_elements(); ++i)
@@ -777,19 +794,21 @@ void StaticShapeInferer::visit(const ir::operation::Reduce &op)
 
 void StaticShapeInferer::visit(const ir::operation::Reshape &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_idx{op.getInputs().at(ir::operation::Reshape::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
+  const auto &input = operands.at(input_idx);
 
   // get mutable output operand
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   // New shape is given by second input tensor
   if (op.getInputs().size() == 2)
   {
     // Let's check the second input
     const auto shape_idx{op.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
-    const auto &shape = _operands.at(shape_idx);
+    const auto &shape = operands.at(shape_idx);
 
     if (shape.isConstant())
     {
@@ -810,7 +829,6 @@ void StaticShapeInferer::visit(const ir::operation::Reshape &op)
     {
       // if shape is NOT Const, set output shape to be dynamic_
       output.info().setDynamic();
-      _return_has_dynamic_tensor = true;
     }
   }
   // New shape is given by option
@@ -835,21 +853,22 @@ void StaticShapeInferer::visit(const ir::operation::Reshape &op)
 
 void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_idx{op.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
+  const auto &input = operands.at(input_idx);
 
   // get mutable output operand
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   int32_t height_out, width_out;
   if (op.getInputs().size() == 2)
   {
-    auto &size = _operands.at(op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE));
+    auto &size = operands.at(op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE));
     if (!size.isConstant())
     {
       output.info().setDynamic();
-      _return_has_dynamic_tensor = true;
       return;
     }
     const auto size_v = size.asVector<std::int32_t>();
@@ -881,17 +900,19 @@ void StaticShapeInferer::visit(const ir::operation::Reverse &op)
 
 void StaticShapeInferer::visit(const ir::operation::Select &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)};
-  const auto &input_cond = _operands.at(input_cond_idx);
+  const auto &input_cond = operands.at(input_cond_idx);
 
   const auto input_true_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
-  const auto &input_true = _operands.at(input_true_idx);
+  const auto &input_true = operands.at(input_true_idx);
 
   const auto input_false_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
-  const auto &input_false = _operands.at(input_false_idx);
+  const auto &input_false = operands.at(input_false_idx);
 
   auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   // Select output shpae
   ir::Shape new_shape = shape_inference::inferSelectShape(
@@ -901,12 +922,14 @@ void StaticShapeInferer::visit(const ir::operation::Select &op)
 
 void StaticShapeInferer::visit(const ir::operation::Shape &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_idx{op.getInputs().at(0)};
-  const auto &input = _operands.at(input_idx);
+  const auto &input = operands.at(input_idx);
 
   // get mutable output operand
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   // re-sizing output shape
   ir::Shape output_shape;
@@ -917,20 +940,21 @@ void StaticShapeInferer::visit(const ir::operation::Shape &op)
 
 void StaticShapeInferer::visit(const ir::operation::Slice &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
-  const auto &input = _operands.at(input_index);
+  const auto &input = operands.at(input_index);
   const auto begins_index{op.getInputs().at(ir::operation::Slice::Input::BEGINS)};
-  const auto &begins = _operands.at(begins_index);
+  const auto &begins = operands.at(begins_index);
   const auto sizes_index{op.getInputs().at(ir::operation::Slice::Input::SIZES)};
-  const auto &sizes = _operands.at(sizes_index);
+  const auto &sizes = operands.at(sizes_index);
   const auto output_index = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_index);
+  ir::Operand &output = operands.at(output_index);
 
   // Whether input is constant or not does not affect whether output is dynamic or not
   if (!(begins.isConstant() && sizes.isConstant()))
   {
     output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
     return;
   }
 
@@ -959,21 +983,22 @@ void StaticShapeInferer::visit(const ir::operation::Softmax &op)
 
 void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto output_index = op.getOutputs().at(0);
   const auto input_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
   const auto block_shape_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
   const auto padding_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
 
-  ir::Operand &output = _operands.at(output_index);
-  const auto &input = _operands.at(input_idx);
-  const auto &block_shape = _operands.at(block_shape_idx);
-  const auto &padding = _operands.at(padding_idx);
+  ir::Operand &output = operands.at(output_index);
+  const auto &input = operands.at(input_idx);
+  const auto &block_shape = operands.at(block_shape_idx);
+  const auto &padding = operands.at(padding_idx);
 
   // Whether input is constant or not does not affect whether output is dynamic or not
   if (!(block_shape.isConstant() && padding.isConstant()))
   {
     output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
     return;
   }
 
@@ -992,21 +1017,22 @@ void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
 
 void StaticShapeInferer::visit(const ir::operation::Split &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_idx{op.getInputs().at(ir::operation::Split::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
+  const auto &input = operands.at(input_idx);
 
   const auto axis_idx{op.getInputs().at(ir::operation::Split::Input::AXIS)};
-  const auto &axis = _operands.at(axis_idx);
+  const auto &axis = operands.at(axis_idx);
 
   auto outputs = op.getOutputs();
   if (!axis.isConstant())
   {
     for (auto output_idx : outputs)
     {
-      ir::Operand &output = _operands.at(output_idx);
+      ir::Operand &output = operands.at(output_idx);
       output.info().setDynamic();
     }
-    _return_has_dynamic_tensor = true;
     return;
   }
 
@@ -1022,7 +1048,7 @@ void StaticShapeInferer::visit(const ir::operation::Split &op)
     shape_inference::inferSplitShape(input.info().shape(), axis_value, num_splits);
   for (auto output_idx : outputs)
   {
-    ir::Operand &output = _operands.at(output_idx);
+    ir::Operand &output = operands.at(output_idx);
     output.info().shape(new_shape);
   }
 }
@@ -1035,11 +1061,13 @@ void StaticShapeInferer::visit(const ir::operation::SquaredDifference &op)
 
 void StaticShapeInferer::visit(const ir::operation::Squeeze &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_idx{op.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
+  const auto &input = operands.at(input_idx);
 
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   // Squeeze output shpae
   ir::Shape new_shape = shape_inference::inferSqueezeShape(input.info().shape(), op.param());
@@ -1048,21 +1076,22 @@ void StaticShapeInferer::visit(const ir::operation::Squeeze &op)
 
 void StaticShapeInferer::visit(const ir::operation::StridedSlice &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_index{op.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
-  const auto &input = _operands.at(input_index);
+  const auto &input = operands.at(input_index);
   const auto starts_index{op.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
-  const auto &starts = _operands.at(starts_index);
+  const auto &starts = operands.at(starts_index);
   const auto ends_index{op.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
-  const auto &ends = _operands.at(ends_index);
+  const auto &ends = operands.at(ends_index);
   const auto strides_index{op.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
-  const auto &strides = _operands.at(strides_index);
+  const auto &strides = operands.at(strides_index);
   const auto output_index = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_index);
+  ir::Operand &output = operands.at(output_index);
 
   if (!(starts.isConstant() && ends.isConstant() && strides.isConstant()))
   {
     output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
     return;
   }
 
@@ -1085,19 +1114,20 @@ void StaticShapeInferer::visit(const ir::operation::StridedSlice &op)
 
 void StaticShapeInferer::visit(const ir::operation::Tile &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_idx{op.getInputs().at(ir::operation::Tile::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
+  const auto &input = operands.at(input_idx);
 
   const auto multiplier_idx{op.getInputs().at(ir::operation::Tile::Input::MULTIPLES)};
-  const auto &multiplier = _operands.at(multiplier_idx);
+  const auto &multiplier = operands.at(multiplier_idx);
 
   const auto output_idx = op.getOutputs().at(0);
-  ir::Operand &output = _operands.at(output_idx);
+  ir::Operand &output = operands.at(output_idx);
 
   if (!multiplier.isConstant())
   {
     output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
     return;
   }
 
@@ -1112,11 +1142,13 @@ void StaticShapeInferer::visit(const ir::operation::Tile &op)
 
 void StaticShapeInferer::visit(const ir::operation::Transpose &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_idx{op.getInputs().at(ir::operation::Transpose::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
+  const auto &input = operands.at(input_idx);
 
   const auto perm_idx{op.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
-  const auto &perm = _operands.at(perm_idx);
+  const auto &perm = operands.at(perm_idx);
 
   // perm.shape() != ir::Shape{0} means that perm is (n-1...0)
   // TODO This condition changes to perm.num_elements() == 0
@@ -1124,11 +1156,10 @@ void StaticShapeInferer::visit(const ir::operation::Transpose &op)
 
   // get mutable output operand
   const auto output_idx = op.getOutputs().at(0);
-  auto &output = _operands.at(output_idx);
+  auto &output = operands.at(output_idx);
   if (!perm.isConstant() && !is_regular_transpose)
   {
     output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
     return;
   }
 
@@ -1157,8 +1188,10 @@ void StaticShapeInferer::visit(const ir::operation::Transpose &op)
 
 void StaticShapeInferer::visit(const ir::operation::Unpack &op)
 {
+  auto &operands = _lowered_subg->graph().operands();
+
   const auto input_idx{op.getInputs().at(0)};
-  const auto &input = _operands.at(input_idx);
+  const auto &input = operands.at(input_idx);
   const auto num = op.param().num;
   const auto rank = input.shape().rank();
   const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
@@ -1169,10 +1202,9 @@ void StaticShapeInferer::visit(const ir::operation::Unpack &op)
     for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
     {
       const auto output_idx = op.getOutputs().at(out_tensor_idx);
-      ir::Operand &output = _operands.at(output_idx);
+      ir::Operand &output = operands.at(output_idx);
       output.info().setDynamic();
     }
-    _return_has_dynamic_tensor = true;
     return;
   }
 
@@ -1182,69 +1214,43 @@ void StaticShapeInferer::visit(const ir::operation::Unpack &op)
   for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
   {
     const auto output_idx = op.getOutputs().at(out_tensor_idx);
-    ir::Operand &output = _operands.at(output_idx);
+    ir::Operand &output = operands.at(output_idx);
     output.info().shape(new_shape);
   }
 }
 
 void StaticShapeInferer::visit(const ir::operation::While &op)
 {
-  auto &cond_graph = _lowered_subgs.at(op.param().cond_subg_index)->graph();
-  auto &body_graph = _lowered_subgs.at(op.param().body_subg_index)->graph();
+  auto body_input_observer = _subg_input_observers.at(op.param().body_subg_index).get();
+  auto cond_input_observer = _subg_input_observers.at(op.param().cond_subg_index).get();
+  // re-sizing input shapes of body subgraph
   const auto inputs = op.getInputs();
-  const auto &outputs = op.getOutputs();
-
-  // re-sizing input shapes of then subgraph
-  const auto &cond_inputs = cond_graph.getInputs();
-  assert(inputs.size() == cond_inputs.size());
+  std::vector<ir::OperandInfo> inputs_info;
+  const auto &graph = _lowered_subg->graph();
   for (size_t i = 0; i < inputs.size(); ++i)
   {
-    const auto &input = _operands.at(inputs.at(i));
-    auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
-    if (input.info().isDynamic())
-    {
-      cond_input.info().setDynamic();
-    }
-    else
-    {
-      auto new_shape = input.info().shape();
-      cond_input.info().shape(new_shape);
-    }
+    const auto &operand_info = graph.operands().at(inputs.at(i)).info();
+    inputs_info.emplace_back(operand_info);
   }
 
-  // re-sizing input shapes of body subgraph
-  const auto &body_inputs = body_graph.getInputs();
-  assert(cond_inputs.size() == body_inputs.size());
-  for (size_t i = 0; i < cond_inputs.size(); ++i)
-  {
-    const auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
-    auto &body_input = body_graph.operands().at(body_inputs.at(i));
-    if (cond_input.info().isDynamic())
-    {
-      body_input.info().setDynamic();
-    }
-    else
-    {
-      const auto &new_shape = cond_input.info().shape();
-      body_input.info().shape(new_shape);
-    }
-  }
-
-  // re-sizing operands of body subgraph
-  inferSubgraph(op.param().body_subg_index);
+  body_input_observer->updateShapes(inputs_info);
+  _child_inferers.at(op.param().body_subg_index)->infer();
 
   // Check whether while operation's shapes are predictable
-  // If any of shape of body outputs and cond inputs are different, non-constant operands would be
-  // set to dynamic
+  // This while op's outputs are also updated in the above function
+  // "_child_inferers.at(op.param().body_subg_index)->update()". That means that body's outputs and
+  // thils op's outputs must have the same shape. So we can predict whether body subgraphs will
+  // change at every step by comparing the shapes of inputs/outputs. If any of shape of body outputs
+  // and inputs are different Non-constant operands will be set to dynamic.
   bool check_unpredictable_dynamic = false;
-  const auto &body_outputs = body_graph.getOutputs();
-  assert(body_outputs.size() == cond_inputs.size());
-  for (size_t i = 0; i < body_outputs.size(); ++i)
+  const auto &updated_outputs = op.getOutputs();
+  assert(inputs_info.size() == updated_outputs.size());
+  for (size_t i = 0; i < updated_outputs.size(); ++i)
   {
-    const auto &body_output = body_graph.operands().at(body_outputs.at(i));
-    auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
-    if ((cond_input.info().isDynamic() != body_output.info().isDynamic()) ||
-        (cond_input.shape() != body_output.shape()))
+    const auto &input_info = inputs_info.at(i);
+    const auto &output_info = graph.operands().at(updated_outputs.at(i)).info();
+    if (input_info.isDynamic() != output_info.isDynamic() ||
+        input_info.shape() != output_info.shape())
     {
       check_unpredictable_dynamic = true;
       break;
@@ -1253,53 +1259,11 @@ void StaticShapeInferer::visit(const ir::operation::While &op)
 
   if (check_unpredictable_dynamic)
   {
-    // Set inputs of body subgraph
-    for (const auto &input_index : body_inputs)
-    {
-      auto &input = body_graph.operands().at(input_index);
-      if (!input.isConstant())
-      {
-        input.info().setDynamic();
-      }
-    }
-
-    // Set inputs of cond subgraph
-    for (const auto &input_index : cond_inputs)
-    {
-      auto &input = cond_graph.operands().at(input_index);
-      if (!input.isConstant())
-      {
-        input.info().setDynamic();
-      }
-    }
-
-    // Set non-constant operands of body subgraph to dynamic
-    inferSubgraph(op.param().body_subg_index);
-  }
-
-  // re-sizing operands of cond subgraph
-  // If check_unpredictable_dynamic is true, non-constant operands of cond subgraph would be set to
-  // dynamic
-  inferSubgraph(op.param().cond_subg_index);
-
-  // re-sizing outputs of while operation
-  // If check_unpredictable_dynamic is true, outputs of while operation would be set to dynamic
-  assert(cond_inputs.size() == outputs.size());
-  for (size_t i = 0; i < cond_inputs.size(); ++i)
-  {
-    const auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
-    auto &output = _operands.at(outputs.at(i));
-    if (cond_input.info().isDynamic())
-    {
-      output.info().setDynamic();
-      _return_has_dynamic_tensor = true;
-    }
-    else
-    {
-      const auto new_shape = cond_input.info().shape();
-      output.info().shape(new_shape);
-    }
+    body_input_observer->updateShapes(inputs_info, check_unpredictable_dynamic);
+    _child_inferers.at(op.param().body_subg_index)->infer();
   }
+  cond_input_observer->updateShapes(inputs_info, check_unpredictable_dynamic);
+  _child_inferers.at(op.param().cond_subg_index)->infer();
 }
 
 void StaticShapeInferer::visit(const ir::operation::DetectionPostProcess &op)
@@ -1307,24 +1271,52 @@ void StaticShapeInferer::visit(const ir::operation::DetectionPostProcess &op)
   // TODO: NMS supports very limited input/output size.
   ir::operation::DetectionPostProcess::Param param = op.param();
 
+  auto &operands = _lowered_subg->graph().operands();
   const int num_detected_boxes = param.max_detections * param.max_classes_per_detection;
 
   const auto output_idx1 = op.getOutputs().at(0);
-  auto &output1 = _operands.at(output_idx1);
+  auto &output1 = operands.at(output_idx1);
   output1.info().shape({1, num_detected_boxes, 4});
 
   const auto output_idx2 = op.getOutputs().at(1);
-  auto &output2 = _operands.at(output_idx2);
+  auto &output2 = operands.at(output_idx2);
   output2.info().shape({1, num_detected_boxes});
 
   const auto output_idx3 = op.getOutputs().at(2);
-  auto &output3 = _operands.at(output_idx3);
+  auto &output3 = operands.at(output_idx3);
   output3.info().shape({1, num_detected_boxes});
 
   const auto output_idx4 = op.getOutputs().at(3);
-  auto &output4 = _operands.at(output_idx4);
+  auto &output4 = operands.at(output_idx4);
   output4.info().shape({1});
 }
+void StaticShapeInferer::visit(const ir::operation::Bulk &op)
+{
+  auto &operands = _lowered_subg->graph().operands();
+
+  // TODO: support multiple inputs/outputs
+  const auto input_idx{op.getInputs().at(0)};
+  const auto &input = operands.at(input_idx);
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = operands.at(output_idx);
+
+  auto cur_input_shape = input.info().shape();
+  auto origin_input_shape = op.param().origin_input_shapes[0];
+  auto cur_output_shape = output.info().shape();
+  auto origin_output_shape = op.param().origin_output_shapes[0];
+
+  // TODO: more check for valid batch request
+  assert(cur_input_shape.dim(0) >= origin_output_shape.dim(0));
+  assert(cur_input_shape.dim(0) % origin_output_shape.dim(0) == 0);
+  size_t batch_multiplier = cur_input_shape.dim(0) / origin_output_shape.dim(0);
+
+  ir::Shape new_shape;
+  new_shape.append(origin_output_shape.dim(0) * batch_multiplier);
+  for (int32_t d = 1; d < origin_output_shape.rank(); ++d)
+    new_shape.append(origin_output_shape.dim(d));
+
+  output.info().shape(new_shape);
+}
 
 } // namespace compiler
 
diff --git a/runtime/onert/core/src/compiler/TensorRegistries.h b/runtime/onert/core/src/compiler/TensorRegistries.h
index 2a99db781..b3cc0bbe3 100644
--- a/runtime/onert/core/src/compiler/TensorRegistries.h
+++ b/runtime/onert/core/src/compiler/TensorRegistries.h
@@ -17,13 +17,14 @@
 #ifndef __ONERT_COMPILER_TENSOR_REGISTRIES_H__
 #define __ONERT_COMPILER_TENSOR_REGISTRIES_H__
 
-#include <unordered_set>
-#include <memory>
-#include "backend/BackendContext.h"
+#include "../backend/builtin/Config.h"
+#include "../backend/builtin/TensorRegistry.h"
+
 #include "backend/Backend.h"
-#include "backend/builtin/Config.h"
-#include "backend/builtin/TensorBuilder.h"
-#include "backend/builtin/TensorRegistry.h"
+#include "backend/BackendContext.h"
+
+#include <memory>
+#include <unordered_set>
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
index 181f388de..c27ce3d09 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
@@ -15,7 +15,6 @@
  */
 
 #include "PermutationEliminationPass.h"
-#include "backend/builtin/Config.h"
 
 #include "util/logging.h"
 
diff --git a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
index 6f9899114..71efa1bb5 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
@@ -17,18 +17,16 @@
 
 #include "PermutationInsertionPass.h"
 
-#include <cassert>
-#include <utility>
-#include <unordered_map>
+#include "../../backend/builtin/Config.h"
 
-#include "backend/builtin/Config.h"
-#include "ir/Operand.h"
 #include "compiler/OperationLowerInfo.h"
-#include "ir/Graph.h"
-#include "backend/IConfig.h"
+#include "ir/operation/Permute.h"
 #include "util/logging.h"
+
+#include <cassert>
 #include <memory>
-#include "ir/operation/Permute.h"
+#include <unordered_map>
+#include <utility>
 
 namespace onert
 {
@@ -125,6 +123,8 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde
   // backend
   auto &model_outputs = _graph.getOutputs();
   const backend::Backend *builtin_backend = compiler::BackendManager::get().getBuiltin();
+  assert(builtin_backend->config()->id() == onert::backend::builtin::Config::ID);
+
   if (model_outputs.contains(operand_index) && factor.backend() == builtin_backend)
   {
     model_outputs.replace(operand_index, out_operand_index);
@@ -141,6 +141,8 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde
   const auto permute_node_layout = ir::Layout::UNKNOWN;
   // NOTE If one backend supports several layout, the backend must support Permute operation
   const backend::Backend *permute_node_backend = compiler::BackendManager::get().getBuiltin();
+  assert(permute_node_backend->config()->id() == onert::backend::builtin::Config::ID);
+
   if (input_backend == output_backend)
   {
     permute_node_backend = input_backend;
diff --git a/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc
new file mode 100644
index 000000000..572b4df24
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "UnusedOperandEliminationPass.h"
+
+#include "ir/Graph.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::ir;
+using namespace onert::compiler::pass;
+
+TEST(UnusedOperandEliminationPass, Simple)
+{
+  Graph graph;
+
+  // Add tensors
+  Shape shape{1, 2, 2, 1};
+  TypeInfo type{DataType::FLOAT32};
+  auto in = graph.addOperand(shape, type);
+  auto out = graph.addOperand(shape, type);
+
+  auto unused = graph.addOperand(shape, type);
+
+  // Set model inputs/outputs
+  graph.addInput(in);
+  graph.addOutput(out);
+
+  UnusedOperandEliminationPass{graph}.run();
+
+  ASSERT_TRUE(graph.operands().exist(in));
+  ASSERT_TRUE(graph.operands().exist(out));
+  ASSERT_FALSE(graph.operands().exist(unused));
+}
diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.cc b/runtime/onert/core/src/dumper/dot/DotDumper.cc
index 714fb6fda..0bb2fa11f 100644
--- a/runtime/onert/core/src/dumper/dot/DotDumper.cc
+++ b/runtime/onert/core/src/dumper/dot/DotDumper.cc
@@ -19,6 +19,7 @@
 
 #include "DotDumper.h"
 #include "DotBuilder.h"
+#include "ir/OperandIndexMap.h"
 #include "ir/OperationIndexMap.h"
 #include "backend/Backend.h"
 #include "backend/IConfig.h"
@@ -31,97 +32,72 @@ namespace dumper
 namespace dot
 {
 
-void DotDumper::dump(const std::string &tag)
+namespace
 {
-  if (_level == Level::OFF)
-  {
-    return;
-  }
-
-  onert::dumper::dot::DotBuilder dot_builder;
-
-  auto &operations = _graph.operations();
-  auto &operands = _graph.operands();
-
-  ir::OperationIndexMap<std::unique_ptr<Operation>> operation_nodes;
-  std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>> operand_nodes;
-
-  auto backend_to_fillcolor = [](const backend::Backend *backend) {
-    static const auto map = []() {
-      std::unordered_map<const backend::Backend *, std::string> ret;
-      uint32_t index = 1; // Start from 1 to avoid 0(red) which is too dark :(
-      for (const auto backend : compiler::BackendManager::get().getAll())
-      {
-        ret.emplace(backend, Node::BG_COLORS[index]);
-        index = (index + 1) % (sizeof(Node::BG_COLORS) / sizeof(Node::BG_COLORS[0]));
-      }
-      return ret;
-    }();
-
-    auto itr = map.find(backend);
-    if (itr == map.end())
-    {
-      return Node::DEFAULT_FILLCOLOR;
-    }
-    else
+std::string backend_to_fillcolor(const backend::Backend *backend)
+{
+  static const auto map = []() {
+    std::unordered_map<const backend::Backend *, std::string> ret;
+    uint32_t index = 1; // Start from 1 to avoid 0(red) which is too dark :(
+    for (const auto backend : compiler::BackendManager::get().getAll())
     {
-      return itr->second;
+      ret.emplace(backend, Node::BG_COLORS[index]);
+      index = (index + 1) % (sizeof(Node::BG_COLORS) / sizeof(Node::BG_COLORS[0]));
     }
-  };
+    return ret;
+  }();
+  auto itr = map.find(backend);
+  if (itr == map.end())
+  {
+    return Node::DEFAULT_FILLCOLOR;
+  }
+  else
+  {
+    return itr->second;
+  }
+}
 
-  util::Set<ir::OperandIndex> shown_operand_set;
+std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>>
+generate_dot_operands(const ir::Graph &graph, const DotDumper::Level level)
+{
+  std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>> dot_operands;
 
+  const auto &operands = graph.operands();
   operands.iterate([&](const ir::OperandIndex &index, const ir::Operand &object) {
-    bool showing_cond = false;
-    if (_level == Level::ALL)
-    {
-      showing_cond = true;
-    }
-    else
-    {
-      showing_cond =
-        !object.isConstant() || (_graph.getInputs() + _graph.getOutputs()).contains(index);
-    }
+    bool showing_cond =
+      level == DotDumper::Level::ALL
+        ? true
+        : !object.isConstant() || (graph.getInputs() + graph.getOutputs()).contains(index);
     if (showing_cond)
     {
-      shown_operand_set.add(index);
-
       auto type = [&]() {
         using onert::dumper::dot::Operand;
-        if (_graph.getInputs().contains(index))
+        if (graph.getInputs().contains(index))
           return Operand::Type::MODEL_INPUT;
-        if (_graph.getOutputs().contains(index))
+        if (graph.getOutputs().contains(index))
           return Operand::Type::MODEL_OUTPUT;
         return Operand::Type::INTERNAL;
       }();
 
       auto node = std::make_unique<Operand>(index, type);
+      std::string label = std::to_string(index.value());
+      std::string fillcolor = "";
+      node->setAttribute("label", label);
+      node->setAttribute("fillcolor", fillcolor);
 
-      {
-        // Display LowerInfo attributes
-        std::string label = std::to_string(index.value());
-        std::string fillcolor = "";
-        if (_lowered_graph)
-        {
-          auto lower_info = _lowered_graph->lower_info().operand.getRawPtr(index);
-          const auto &def_factors = lower_info->def_factors();
-          if (def_factors.size() > 0)
-          {
-            label += "\\n[";
-            label += def_factors.getOnlyElement().backend()->config()->id();
-            label += "]";
-
-            fillcolor = backend_to_fillcolor(lower_info->def_factors().getOnlyElement().backend());
-          }
-        }
-        node->setAttribute("label", label);
-        node->setAttribute("fillcolor", fillcolor);
-      }
-
-      operand_nodes.emplace(index, std::move(node));
+      dot_operands.emplace(index, std::move(node));
     }
   });
 
+  return dot_operands;
+}
+
+ir::OperationIndexMap<std::unique_ptr<Operation>>
+generate_dot_operations(const ir::Graph &graph,
+                        const ir::OperandIndexMap<std::unique_ptr<Operand>> &dot_operands)
+{
+  ir::OperationIndexMap<std::unique_ptr<Operation>> dot_operations;
+  const auto &operations = graph.operations();
   operations.iterate([&](const ir::OperationIndex &index, const ir::Operation &op) {
     auto node = std::make_unique<Operation>(index, op);
 
@@ -130,42 +106,79 @@ void DotDumper::dump(const std::string &tag)
       using onert::dumper::dot::Operand;
 
       // Constant input and dump level is ALL_BUT_CONSTANTS
-      if (operand_nodes.find(input) == operand_nodes.end())
+      if (dot_operands.find(input) == dot_operands.end())
         continue;
 
-      auto &input_node = operand_nodes.at(input);
+      auto &input_node = dot_operands.at(input);
       input_node->addOutEdge(node.get());
     }
 
     for (auto output : op.getOutputs() | ir::Remove::UNDEFINED)
     {
       using onert::dumper::dot::Operand;
-      auto &output_node = operand_nodes.at(output);
+      auto &output_node = dot_operands.at(output);
       node->addOutEdge(output_node.get());
     }
 
-    operation_nodes.emplace(index, std::move(node));
+    dot_operations.emplace(index, std::move(node));
   });
 
-  if (_lowered_graph)
-  {
-    _graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) {
-      const auto lower_info = _lowered_graph->lower_info().operation.getRawPtr(index);
-      if (lower_info)
+  return dot_operations;
+}
+
+void update_lower_info(const compiler::LoweredGraph &lowered_graph,
+                       ir::OperandIndexMap<std::unique_ptr<Operand>> *dot_operands)
+{
+  const auto &operands = lowered_graph.graph().operands();
+  operands.iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
+    auto itr = dot_operands->find(index);
+    if (itr != dot_operands->end())
+    {
+      auto &node = itr->second;
+      // Display LowerInfo attributes
+      std::string label = node->getAttribute("label");
+      std::string fillcolor = node->getAttribute("fillcolor");
+      auto lower_info = lowered_graph.lower_info().operand.getRawPtr(index);
+      const auto &def_factors = lower_info->def_factors();
+      if (def_factors.size() > 0)
       {
-        auto fillcolor = backend_to_fillcolor(lower_info->backend());
-        std::string backend_label = "[" + lower_info->backend()->config()->id() + "]";
-        auto itr = operation_nodes.find(index);
-        if (itr != operation_nodes.end())
-        {
-          auto &node = itr->second;
-          node->setAttribute("label", node->getAttribute("label") + "\n" + backend_label);
-          node->setAttribute("fillcolor", fillcolor);
-        }
+        label += "\\n[";
+        label += def_factors.getOnlyElement().backend()->config()->id();
+        label += "]";
+        fillcolor = backend_to_fillcolor(lower_info->def_factors().getOnlyElement().backend());
       }
-    });
-  }
+      node->setAttribute("label", label);
+      node->setAttribute("fillcolor", fillcolor);
+    }
+  });
+}
 
+void update_lower_info(const compiler::LoweredGraph &lowered_graph,
+                       ir::OperationIndexMap<std::unique_ptr<Operation>> *dot_operations)
+{
+  const auto &operations = lowered_graph.graph().operations();
+  operations.iterate([&](const ir::OperationIndex &index, const ir::Operation &) {
+    const auto lower_info = lowered_graph.lower_info().operation.getRawPtr(index);
+    if (lower_info)
+    {
+      auto fillcolor = backend_to_fillcolor(lower_info->backend());
+      std::string backend_label = "[" + lower_info->backend()->config()->id() + "]";
+      auto itr = dot_operations->find(index);
+      if (itr != dot_operations->end())
+      {
+        auto &node = itr->second;
+        node->setAttribute("label", node->getAttribute("label") + "\n" + backend_label);
+        node->setAttribute("fillcolor", fillcolor);
+      }
+    }
+  });
+}
+
+void dump_to_file(const ir::OperandIndexMap<std::unique_ptr<Operand>> &operand_nodes,
+                  const ir::OperationIndexMap<std::unique_ptr<Operation>> &operation_nodes,
+                  const std::string &tag)
+{
+  onert::dumper::dot::DotBuilder dot_builder;
   for (const auto &e : operation_nodes)
     dot_builder.update(*e.second);
   for (const auto &e : operand_nodes)
@@ -186,6 +199,33 @@ void DotDumper::dump(const std::string &tag)
     fb.close();
   }
 }
+} // namespace
+
+void DotDumper::dump(const ir::Graph &graph, const std::string &tag)
+{
+  if (_level == Level::OFF)
+  {
+    return;
+  }
+
+  const auto dot_operands = generate_dot_operands(graph, _level);
+  const auto dot_operations = generate_dot_operations(graph, dot_operands);
+  dump_to_file(dot_operands, dot_operations, tag);
+}
+
+void DotDumper::dump(const compiler::LoweredGraph &lowered_graph, const std::string &tag)
+{
+  if (_level == Level::OFF)
+  {
+    return;
+  }
+
+  auto dot_operands = generate_dot_operands(lowered_graph.graph(), _level);
+  auto dot_operations = generate_dot_operations(lowered_graph.graph(), dot_operands);
+  update_lower_info(lowered_graph, &dot_operands);
+  update_lower_info(lowered_graph, &dot_operations);
+  dump_to_file(dot_operands, dot_operations, tag);
+}
 
 } // namespace dot
 } // namespace dumper
diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.h b/runtime/onert/core/src/dumper/dot/DotDumper.h
index f300c3432..6249010d3 100644
--- a/runtime/onert/core/src/dumper/dot/DotDumper.h
+++ b/runtime/onert/core/src/dumper/dot/DotDumper.h
@@ -38,27 +38,28 @@ public:
   };
 
 public:
-  DotDumper(const ir::Graph &graph, Level level)
-    : _lowered_graph{nullptr}, _graph(graph), _level{level}
-  {
-  }
-  DotDumper(const compiler::LoweredGraph *lowered_graph, Level level)
-    : _lowered_graph{lowered_graph}, _graph(_lowered_graph->graph()), _level{level}
-  {
-  }
+  DotDumper(Level level) : _level{level} {}
 
 public:
   /**
-   * @brief Dump to dot file as tag name if "GRAPH_DOT_DUMP" is set
+   * @brief Dump graph information to dot file as tag name if "GRAPH_DOT_DUMP" is set
+   *
+   * @param[in] graph  The graph that would be used to get operations and operands
+   * @param[in] tag    The name of dot file that would be created
+   * @return N/A
+   */
+  void dump(const ir::Graph &graph, const std::string &tag);
+
+  /**
+   * @brief Dump lowered graph information to dot file as tag name if "GRAPH_DOT_DUMP" is set
    *
+   * @param[in] graph  The graph that would be used to get operations and operands
    * @param[in] tag    The name of dot file that would be created
    * @return N/A
    */
-  void dump(const std::string &tag);
+  void dump(const compiler::LoweredGraph &lowered_graph, const std::string &tag);
 
 private:
-  const compiler::LoweredGraph *_lowered_graph;
-  const ir::Graph &_graph;
   Level _level;
 };
 
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h
index bcac19d2e..1649be733 100644
--- a/runtime/onert/core/src/exec/DataflowExecutor.h
+++ b/runtime/onert/core/src/exec/DataflowExecutor.h
@@ -17,19 +17,18 @@
 #ifndef __ONERT_EXEC_DATAFLOW_EXECUTOR_H__
 #define __ONERT_EXEC_DATAFLOW_EXECUTOR_H__
 
-#include <list>
-#include <map>
-#include <unordered_map>
-
-#include "exec/FunctionSequence.h"
+#include "ExecutorBase.h"
 #include "Job.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/Index.h"
-#include <memory>
-#include "exec/ExecutorBase.h"
+
 #include "compiler/CodeMap.h"
+#include "ir/OperandIndexSequence.h"
 #include "util/TracingCtx.h"
 
+#include <list>
+#include <map>
+#include <memory>
+#include <unordered_map>
+
 namespace onert
 {
 namespace exec
diff --git a/runtime/onert/core/src/exec/ExecTime.cc b/runtime/onert/core/src/exec/ExecTime.cc
index 6bf2744a9..4b82655b9 100644
--- a/runtime/onert/core/src/exec/ExecTime.cc
+++ b/runtime/onert/core/src/exec/ExecTime.cc
@@ -14,12 +14,10 @@
  * limitations under the License.
  */
 
-#include "exec/ExecTime.h"
+#include "ExecTime.h"
 
-#include <fstream>
-#include <cassert>
-#include <limits>
 #include <algorithm>
+#include <cassert>
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/exec/ExecTime.test.cc b/runtime/onert/core/src/exec/ExecTime.test.cc
new file mode 100644
index 000000000..1f7152e7b
--- /dev/null
+++ b/runtime/onert/core/src/exec/ExecTime.test.cc
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExecTime.h"
+
+#include "backend/IConfig.h"
+#include "backend/Backend.h"
+
+#include <gtest/gtest.h>
+
+#include <string>
+
+namespace
+{
+using namespace onert;
+using namespace exec;
+using namespace backend;
+
+struct MockConfig : public IConfig
+{
+  std::string id() override { return "b1"; }
+  bool initialize() override { return true; };
+  bool supportPermutation() override { return false; }
+  ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
+  {
+    return ir::Layout::UNKNOWN;
+  }
+  bool supportDynamicTensor() override { return false; }
+  bool supportFP16() override { return false; }
+};
+
+struct MockBackend : public ::onert::backend::Backend
+{
+  std::shared_ptr<onert::backend::IConfig> config() const override
+  {
+    return std::make_shared<MockConfig>();
+  }
+  std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&) const override
+  {
+    return nullptr;
+  }
+};
+
+TEST(ExecTime, roundtrip_ok)
+{
+  const auto *b = new MockBackend();
+  std::vector<const Backend *> bs = {b};
+  {
+    ExecTime et(bs);
+    et.updateOperationExecTime(b, "op1", true, 100, 100);
+    et.updateOperationExecTime(b, "op1", true, 200, 200);
+    et.updateOperationExecTime(b, "op1", false, 100, 888);
+    et.storeOperationsExecTime();
+  }
+  {
+    ExecTime et(bs);
+    auto time = et.getOperationExecTime(b, "op1", true, 100);
+    ASSERT_EQ(time, 100);
+    // Check interpolation
+    time = et.getOperationExecTime(b, "op1", true, 150);
+    ASSERT_EQ(time, 150);
+    time = et.getOperationExecTime(b, "op1", false, 100);
+    ASSERT_EQ(time, 888);
+    et.storeOperationsExecTime();
+  }
+  // clean up
+  EXPECT_EQ(remove("exec_time.json"), 0);
+}
+
+TEST(ExecTime, structure)
+{
+
+  const auto *b = new MockBackend();
+  std::vector<const Backend *> bs = {b};
+  {
+    ExecTime et(bs);
+    et.updateOperationExecTime(b, "op1", true, 100, 100);
+    et.updateOperationExecTime(b, "op1", true, 200, 200);
+    et.storeOperationsExecTime();
+  }
+  {
+    ExecTime et(bs);
+    auto time = et.getOperationExecTime(b, "op1", true, 100);
+    ASSERT_EQ(time, 100);
+    // Check interpolation
+    time = et.getOperationExecTime(b, "op1", true, 200);
+    ASSERT_EQ(time, 200);
+    et.storeOperationsExecTime();
+  }
+  // clean up
+  EXPECT_EQ(remove("exec_time.json"), 0);
+}
+} // unnamed namespace
diff --git a/runtime/onert/core/src/exec/Execution.cc b/runtime/onert/core/src/exec/Execution.cc
index 8eff73bac..9d1e06d6c 100644
--- a/runtime/onert/core/src/exec/Execution.cc
+++ b/runtime/onert/core/src/exec/Execution.cc
@@ -23,13 +23,12 @@ namespace onert
 namespace exec
 {
 
-Execution::Execution(const std::shared_ptr<ExecutorMap> &executors) : _executors{executors}
+Execution::Execution(const std::shared_ptr<Executors> &executors) : _executors{executors}
 {
   assert(executors != nullptr);
   assert(executors->at(ir::SubgraphIndex{0}) != nullptr);
-  const auto &primary_subg = primary_subgraph();
-  _io_desc.inputs.resize(primary_subg.getInputs().size());
-  _io_desc.outputs.resize(primary_subg.getOutputs().size());
+  _io_desc.inputs.resize(_executors->inputSize());
+  _io_desc.outputs.resize(_executors->outputSize());
   sem_init(&_async_io_descs_sem, 0, 1);
 }
 
@@ -48,8 +47,7 @@ void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_
 void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t length,
                          ir::Layout layout)
 {
-  const auto input_index = primary_subgraph().getInputs().at(index);
-  const auto info = primary_subgraph().operands().at(input_index).info();
+  const auto info = _executors->inputInfo(index);
 
   // TODO handle when (!buffer && length != 0) : setting the input as an optional tensor
 
@@ -105,8 +103,7 @@ bool Execution::isEmptyQueue()
 void Execution::executeAsyncInput(const ir::IOIndex &index, const void *buffer, size_t length,
                                   ir::Layout layout)
 {
-  const auto input_index = primary_subgraph().getInputs().at(index);
-  const auto info = primary_subgraph().operands().at(input_index).info();
+  const auto info = _executors->inputInfo(index);
   IODescription *_async_io_desc = _async_io_descs.back().first;
 
   {
@@ -135,8 +132,7 @@ void Execution::executeAsyncInput(const ir::IOIndex &index, const void *buffer,
 void Execution::executeAsyncOutput(const ir::IOIndex &index, void *buffer, size_t length,
                                    ir::Layout layout)
 {
-  const auto output_index = primary_subgraph().getOutputs().at(index);
-  const auto info = primary_subgraph().operands().at(output_index).info();
+  const auto info = _executors->outputInfo(index);
   IODescription *_async_io_desc = _async_io_descs.front().first;
 
   if (length < info.total_size())
@@ -165,8 +161,7 @@ void Execution::setInput(const ir::IOIndex &index, const ir::TypeInfo &type, con
 // TODO Remove default parameter
 void Execution::setOutput(const ir::IOIndex &index, void *buffer, size_t length, ir::Layout layout)
 {
-  const auto output_index = primary_subgraph().getOutputs().at(index);
-  const auto info = primary_subgraph().operands().at(output_index).info();
+  const auto info = _executors->outputInfo(index);
 
   if (length < info.total_size())
   {
@@ -208,7 +203,7 @@ void Execution::execute()
 {
   VERBOSE(Execution) << "Start execution" << std::endl;
 
-  primary_executor()->execute(_io_desc);
+  _executors->execute(_io_desc);
   finished = true;
 
   VERBOSE(Execution) << "Execution finished" << std::endl;
@@ -248,8 +243,7 @@ ir::Shape Execution::getInputShape(ir::IOIndex ind) const
   auto itr = _io_desc.dynamic_input_shapes.find(ind);
   if (itr == _io_desc.dynamic_input_shapes.end())
   {
-    auto operand_idx = primary_subgraph().getInputs().at(ind);
-    return primary_subgraph().operands().at(operand_idx).shape();
+    return _executors->inputInfo(ind).shape();
   }
   else
   {
diff --git a/runtime/onert/core/src/exec/Execution.test.cc b/runtime/onert/core/src/exec/Execution.test.cc
new file mode 100644
index 000000000..e3ea49470
--- /dev/null
+++ b/runtime/onert/core/src/exec/Execution.test.cc
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/Execution.h"
+
+#include "compiler/Compiler.h"
+#include "ir/Graph.h"
+#include "ir/operation/BinaryArithmetic.h"
+#include "util/TracingCtx.h"
+
+#include <gtest/gtest.h>
+#include <thread>
+
+namespace
+{
+
+using namespace onert::ir;
+
+class CompiledMockUpModel
+{
+public:
+  CompiledMockUpModel()
+  {
+    // Model: two elementwise add operation
+    // model input: lhs, rhs1
+    // model output: second add result (result2)
+    // constant: rhs2
+    // result1 <= (lhs + rhs)
+    // result2 <= (result1 + rhs2)
+    // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1}
+    // activation: none (constant)
+    graph = std::make_shared<Graph>();
+    // 1st add operands (result1 <= lhs + rhs1)
+    Shape shape{1, 2, 2, 1};
+    TypeInfo type{DataType::FLOAT32};
+    static float rhs2_data[4] = {3, 1, -1, 5};
+    auto operand_lhs = graph->addOperand(shape, type);
+    auto operand_rhs1 = graph->addOperand(shape, type);
+    auto operand_result1 = graph->addOperand(shape, type);
+    auto operand_rhs2 = graph->addOperand(shape, type);
+    auto operand_result2 = graph->addOperand(shape, type);
+    graph->operands()
+      .at(operand_rhs2)
+      .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
+    // 2nd add operations (result2 <= result1 + rhs2)
+    operation::BinaryArithmetic::Param param1;
+    param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+    param1.activation = Activation::NONE;
+    auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
+    auto output_set1 = OperandIndexSequence{operand_result1};
+    graph->addOperation(
+      std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+    operation::BinaryArithmetic::Param param2;
+    param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+    param2.activation = Activation::NONE;
+    auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
+    auto output_set2 = OperandIndexSequence{operand_result2};
+    graph->addOperation(
+      std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
+    // Identify model inputs and outputs
+    graph->addInput(operand_lhs);
+    graph->addInput(operand_rhs1);
+    graph->addOutput(operand_result2);
+    graph->verify();
+
+    // Compile
+    auto model = std::make_shared<onert::ir::Model>();
+    model->push(onert::ir::SubgraphIndex{0}, graph);
+    coptions = onert::compiler::CompilerOptions::fromGlobalConfig();
+    onert::compiler::Compiler compiler{model, *coptions};
+    artifact = compiler.compile();
+  }
+
+public:
+  std::shared_ptr<Graph> graph;
+  std::unique_ptr<onert::compiler::CompilerOptions> coptions;
+  std::shared_ptr<onert::compiler::CompilerArtifact> artifact;
+};
+
+TEST(ExecInstance, simple)
+{
+  auto mockup = CompiledMockUpModel();
+  auto graph = mockup.graph;
+  auto executors = mockup.artifact->_executors;
+
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto output = IOIndex{0};
+
+  const float input1_buffer[4] = {1, 0, -1, -2};
+  const float input2_buffer[4] = {1, -3, 2, -4};
+  float output_buffer[4] = {};
+  const float output_expected[4] = {5, -2, 0, -1};
+
+  onert::exec::Execution execution{executors};
+
+  execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+  execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+  execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+  execution.execute();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(output_buffer[i], output_expected[i]);
+  }
+}
+
+TEST(ExecInstance, twoCompile)
+{
+  auto mockup = CompiledMockUpModel();
+  auto graph = mockup.graph;
+  auto executors1 = mockup.artifact->_executors;
+  onert::exec::Execution execution1{executors1};
+
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto output = IOIndex{0};
+
+  const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+  const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+  float exe1_output_buffer[4] = {};
+  const float exe1_output_expected[4] = {5, -2, 0, -1};
+
+  execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+  execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+  execution1.setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+  // Make new executor: compile again
+  auto model = std::make_shared<onert::ir::Model>();
+  model->push(onert::ir::SubgraphIndex{0}, graph);
+  auto coptions = onert::compiler::CompilerOptions::fromGlobalConfig();
+  onert::compiler::Compiler compiler{model, *coptions};
+  std::shared_ptr<onert::compiler::CompilerArtifact> artifact = compiler.compile();
+  onert::exec::Execution execution2{artifact->_executors};
+
+  const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+  const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+  float exe2_output_buffer[4] = {};
+  const float exe2_output_expected[4] = {2, 5, -2, 7};
+
+  execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+  execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+  execution2.setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+  execution1.execute();
+  execution2.execute();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+    EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+  }
+}
+
+// Support two initialized execution instance then ordered execution
+TEST(ExecInstance, twoExecution)
+{
+  auto mockup = CompiledMockUpModel();
+  auto executors = mockup.artifact->_executors;
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto output1 = IOIndex{0};
+
+  const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+  const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+  float exe1_output_buffer[4] = {};
+  const float exe1_output_expected[4] = {5, -2, 0, -1};
+  const float exe2_output_expected[4] = {2, 5, -2, 7};
+
+  onert::exec::Execution execution1{executors};
+  execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+  execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+  execution1.setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+  const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+  const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+  float exe2_output_buffer[4] = {};
+
+  // Make new execution
+  onert::exec::Execution execution2{executors};
+  execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+  execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+  execution2.setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+  execution1.execute();
+  execution2.execute();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+    EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+  }
+}
+
+class Inference
+{
+public:
+  Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4],
+            std::shared_ptr<onert::exec::Executors> &executors)
+    : _input1{input1}, _input2{input2}, _output{output}, _executors{executors}
+  {
+    // DO NOTHING
+  }
+
+  void inference(void)
+  {
+    auto input1 = IOIndex{0};
+    auto input2 = IOIndex{1};
+    auto output1 = IOIndex{0};
+
+    onert::exec::Execution execution{_executors};
+    execution.setInput(input1, reinterpret_cast<const void *>(_input1), 16);
+    execution.setInput(input2, reinterpret_cast<const void *>(_input2), 16);
+    execution.setOutput(output1, reinterpret_cast<void *>(_output), 16);
+
+    execution.execute();
+  }
+
+private:
+  const float (&_input1)[4];
+  const float (&_input2)[4];
+  float (&_output)[4];
+  std::shared_ptr<onert::exec::Executors> &_executors;
+};
+
+// Support multi-thread execution
+TEST(ExecInstance, twoThreads)
+{
+  auto mockup = CompiledMockUpModel();
+  auto executors = mockup.artifact->_executors;
+
+  const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+  const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+  float exe1_output_buffer[4] = {};
+  const float exe1_output_expected[4] = {5, -2, 0, -1};
+
+  Inference execution1{exe1_input1_buffer, exe1_input2_buffer, exe1_output_buffer, executors};
+
+  const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+  const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+  float exe2_output_buffer[4] = {};
+  const float exe2_output_expected[4] = {2, 5, -2, 7};
+
+  Inference execution2{exe2_input1_buffer, exe2_input2_buffer, exe2_output_buffer, executors};
+
+  std::thread t1{&Inference::inference, &execution1};
+  std::thread t2{&Inference::inference, &execution2};
+
+  t1.join();
+  t2.join();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+    EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+  }
+}
+
+// Support asynchronous execution
+TEST(ExecInstance, async)
+{
+  auto mockup = CompiledMockUpModel();
+  auto graph = mockup.graph;
+  auto executors = mockup.artifact->_executors;
+
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto output = IOIndex{0};
+
+  const float input1_buffer[4] = {1, 0, -1, -2};
+  const float input2_buffer[4] = {1, -3, 2, -4};
+  float output_buffer[4] = {};
+  const float output_expected[4] = {5, -2, 0, -1};
+
+  onert::exec::Execution execution{executors};
+
+  execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+  execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+  execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+  execution.startExecute();
+  execution.waitFinish();
+
+  for (auto i = 0; i < 4; i++)
+  {
+    EXPECT_EQ(output_buffer[i], output_expected[i]);
+  }
+}
+
+} // namespace
diff --git a/runtime/onert/core/src/exec/ExecutionObservee.h b/runtime/onert/core/src/exec/ExecutionObservee.h
index 423b5026b..3ee1754c9 100644
--- a/runtime/onert/core/src/exec/ExecutionObservee.h
+++ b/runtime/onert/core/src/exec/ExecutionObservee.h
@@ -17,11 +17,12 @@
 #ifndef __ONERT_EXEC_EXECUTION_OBSERVEE_H__
 #define __ONERT_EXEC_EXECUTION_OBSERVEE_H__
 
-#include <list>
+#include "ExecutionObservers.h"
 
-#include "exec/ExecutionObservers.h"
 #include "ir/Index.h"
 
+#include <list>
+
 namespace onert
 {
 namespace exec
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.cc b/runtime/onert/core/src/exec/ExecutionObservers.cc
index 386178ae6..9abde7ba4 100644
--- a/runtime/onert/core/src/exec/ExecutionObservers.cc
+++ b/runtime/onert/core/src/exec/ExecutionObservers.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "exec/ExecutionObservers.h"
+#include "ExecutionObservers.h"
 
-#include <string>
-#include <sstream>
+#include "../util/EventWriter.h"
 
 #include "util/logging.h"
-#include "exec/IExecutor.h"
-#include "misc/polymorphic_downcast.h"
-#include "ir/Operation.h"
-#include "util/EventWriter.h"
+
+#include <misc/polymorphic_downcast.h>
+
+#include <string>
+#include <sstream>
 
 namespace
 {
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.h b/runtime/onert/core/src/exec/ExecutionObservers.h
index 4c6c7b18e..1aadac2f5 100644
--- a/runtime/onert/core/src/exec/ExecutionObservers.h
+++ b/runtime/onert/core/src/exec/ExecutionObservers.h
@@ -17,17 +17,16 @@
 #ifndef __ONERT_EXEC_OBSREVERS_H__
 #define __ONERT_EXEC_OBSREVERS_H__
 
-#include "exec/IFunction.h"
+#include "ExecTime.h"
+#include "../util/EventCollector.h"
+#include "../util/EventRecorder.h"
+#include "../util/EventWriter.h"
+
+#include "exec/Executors.h"
 #include "ir/Index.h"
 #include "ir/Operation.h"
-#include "ExecTime.h"
 #include "util/ITimer.h"
-#include "exec/IExecutor.h"
-#include "util/EventCollector.h"
-#include "util/EventRecorder.h"
-#include "util/EventWriter.h"
 #include "util/TracingCtx.h"
-#include "util/EventWriter.h"
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc
index efc22cfa5..d2d204a0b 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.cc
+++ b/runtime/onert/core/src/exec/ExecutorBase.cc
@@ -15,11 +15,10 @@
  */
 
 #include "ExecutorBase.h"
+
 #include "ShapeConverter.h"
 
-#include "backend/builtin/UserTensor.h"
-#include "util/logging.h"
-#include "misc/polymorphic_downcast.h"
+#include <misc/polymorphic_downcast.h>
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h
index c0f609d11..e4f914546 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.h
+++ b/runtime/onert/core/src/exec/ExecutorBase.h
@@ -17,22 +17,17 @@
 #ifndef __ONERT_EXEC_EXECUTOR_BASE_H__
 #define __ONERT_EXEC_EXECUTOR_BASE_H__
 
-#include "IPermuteFunction.h"
+#include "ExecutionObservee.h"
+#include "../backend/builtin/IOTensor.h"
+#include "../compiler/TensorRegistries.h"
+
+#include "compiler/LoweredGraph.h"
 #include "exec/IExecutor.h"
-#include "exec/ExecTime.h"
-#include "exec/ExecutionObservee.h"
-#include "exec/IFunction.h"
 #include "exec/IODescription.h"
 #include "ir/Graph.h"
-#include "ir/Index.h"
-#include "compiler/GraphLowerInfo.h"
 #include "ir/OperationIndexMap.h"
-#include "compiler/LoweredGraph.h"
-#include "compiler/TensorRegistries.h"
-#include "backend/builtin/IOTensor.h"
 #include "util/TracingCtx.h"
 
-#include <cstdint>
 #include <memory>
 #include <mutex>
 #include <vector>
diff --git a/runtime/onert/core/src/exec/Executors.cc b/runtime/onert/core/src/exec/Executors.cc
new file mode 100644
index 000000000..e0ee24fea
--- /dev/null
+++ b/runtime/onert/core/src/exec/Executors.cc
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/Executors.h"
+
+namespace onert
+{
+namespace exec
+{
+
+uint32_t Executors::inputSize() const
+{
+  return _model_edges ? _model_edges->pkg_inputs.size()
+                      : _executors.at(ir::SubgraphIndex{0})->graph().getInputs().size();
+}
+
+uint32_t Executors::outputSize() const
+{
+  return _model_edges ? _model_edges->pkg_outputs.size()
+                      : _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().size();
+}
+
+const ir::OperandInfo Executors::inputInfo(const ir::IOIndex &index)
+{
+  if (_model_edges)
+  {
+    // Assume that each model may have only one subgraph
+    // TODO handle general case
+    const auto desc = _model_edges->pkg_inputs[index.value()];
+    const auto model_idx = std::get<0>(desc);
+    const auto executor_idx = ir::SubgraphIndex{model_idx.value()};
+    const auto input_index = _executors.at(executor_idx)->graph().getInputs().at(std::get<2>(desc));
+    return _executors.at(executor_idx)->graph().operands().at(input_index).info();
+  }
+
+  const auto input_index = _executors.at(ir::SubgraphIndex{0})->graph().getInputs().at(index);
+  return _executors.at(ir::SubgraphIndex{0})->graph().operands().at(input_index).info();
+}
+
+const ir::OperandInfo Executors::outputInfo(const ir::IOIndex &index)
+{
+  if (_model_edges)
+  {
+    // Assume that each model may have only one subgraph
+    // TODO handle general case
+    auto desc = _model_edges->pkg_outputs[index.value()];
+    auto model_idx = std::get<0>(desc);
+    auto executor_idx = ir::SubgraphIndex{model_idx.value()};
+    auto output_index = _executors.at(executor_idx)->graph().getOutputs().at(std::get<2>(desc));
+    return _executors.at(executor_idx)->graph().operands().at(output_index).info();
+  }
+
+  auto output_index = _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().at(index);
+  return _executors.at(ir::SubgraphIndex{0})->graph().operands().at(output_index).info();
+}
+
+void Executors::execute(const IODescription &desc)
+{
+  if (_model_edges)
+    return executeEntries(desc);
+
+  _executors.at(ir::SubgraphIndex{0})->execute(desc);
+}
+
+void Executors::executeEntries(const IODescription &desc)
+{
+  // Assume 2 executors only
+  // Assume that each model may have only one subgraph
+  // TODO Support general case
+  if (_executors.size() != 2)
+    throw std::runtime_error{"NYI: Multi model execution for this package is not supported yet"};
+
+  // Assume all edges are 0:0:x -> 1:0:x
+  for (auto edge : _model_edges->edges)
+  {
+    if ((std::get<ir::ModelIndex>(edge.from) != ir::ModelIndex{0}) ||
+        (std::get<ir::ModelIndex>(edge.to) != ir::ModelIndex{1}) ||
+        (std::get<ir::SubgraphIndex>(edge.from) != ir::SubgraphIndex{0}) ||
+        (std::get<ir::SubgraphIndex>(edge.to) != ir::SubgraphIndex{0}) ||
+        (std::get<ir::IOIndex>(edge.from) != std::get<ir::IOIndex>(edge.to)))
+      throw std::runtime_error{"NYI: Multi model execution for this edge is not supported yet"};
+  }
+
+  // Assume all package inputs are 0:0:x
+  for (uint32_t i = 0; i < _model_edges->pkg_inputs.size(); i++)
+  {
+    auto input = _model_edges->pkg_inputs[i];
+    if ((std::get<ir::ModelIndex>(input) != ir::ModelIndex{0}) ||
+        (std::get<ir::SubgraphIndex>(input) != ir::SubgraphIndex{0}) ||
+        (std::get<ir::IOIndex>(input) != ir::IOIndex{i}))
+    {
+      throw std::runtime_error{"NYI: Support package input to 1st model with same order"};
+    }
+  }
+
+  // Assume all package outputs are 1:0:x
+  for (uint32_t i = 0; i < _model_edges->pkg_outputs.size(); i++)
+  {
+    auto output = _model_edges->pkg_outputs[i];
+    if ((std::get<ir::ModelIndex>(output) != ir::ModelIndex{1}) ||
+        (std::get<ir::SubgraphIndex>(output) != ir::SubgraphIndex{0}) ||
+        (std::get<ir::IOIndex>(output) != ir::IOIndex{i}))
+    {
+      throw std::runtime_error{"NYI: Support package output from 2nd model with same order"};
+    }
+  }
+
+  const auto &executor1 = _executors.at(ir::SubgraphIndex{0});
+  const auto &graph1 = executor1->graph();
+  const auto &executor2 = _executors.at(ir::SubgraphIndex{1});
+  const auto &graph2 = executor2->graph();
+
+  if ((graph1.getInputs().size() != _model_edges->pkg_inputs.size()) ||
+      (graph2.getOutputs().size() != _model_edges->pkg_outputs.size()) ||
+      (graph1.getOutputs().size() != graph2.getInputs().size()) ||
+      (graph1.getOutputs().size() != _model_edges->edges.size()))
+  {
+    throw std::runtime_error{"NYI: Unsupported model edge pattern"};
+  }
+
+  // Prepare buffer
+  // Assume buffer layout is NHWC
+  std::vector<std::unique_ptr<uint8_t[]>> bufs(_model_edges->edges.size());
+  std::vector<const ir::OperandInfo *> buf_infos(_model_edges->edges.size());
+  const auto layout = ir::Layout::NHWC;
+
+  for (uint32_t i = 0; i < graph1.getOutputs().size(); i++)
+  {
+    const auto buf_index =
+      _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().at(ir::IOIndex{i});
+    buf_infos[i] = &_executors.at(ir::SubgraphIndex{0})->graph().operands().at(buf_index).info();
+    const auto buf_size = buf_infos[i]->total_size();
+    bufs[i] = std::make_unique<uint8_t[]>(buf_size);
+  }
+
+  // 1st executor
+  {
+    IODescription desc1;
+    const auto input_size = graph1.getInputs().size();
+    const auto output_size = graph1.getOutputs().size();
+    desc1.inputs.resize(input_size);
+    desc1.outputs.resize(output_size);
+    for (uint32_t i = 0; i < input_size; i++)
+      desc1.inputs[i] = std::make_unique<InputDesc>(*desc.inputs[i].get());
+    for (uint32_t i = 0; i < output_size; i++)
+      desc1.outputs[i] = std::make_unique<OutputDesc>(*buf_infos[i], bufs[i].get(),
+                                                      buf_infos[i]->total_size(), layout);
+
+    executor1->execute(desc1);
+  }
+
+  // 2nd executor
+  {
+    IODescription desc2;
+    const auto input_size = graph2.getInputs().size();
+    const auto output_size = graph2.getOutputs().size();
+    desc2.inputs.resize(input_size);
+    desc2.outputs.resize(output_size);
+    for (uint32_t i = 0; i < input_size; i++)
+      desc2.inputs[i] = std::make_unique<InputDesc>(*buf_infos[i], bufs[i].get(),
+                                                    buf_infos[i]->total_size(), layout);
+    for (uint32_t i = 0; i < output_size; i++)
+      desc2.outputs[i] = std::make_unique<OutputDesc>(*desc.outputs[i].get());
+
+    executor2->execute(desc2);
+  }
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/FunctionSequence.cc b/runtime/onert/core/src/exec/FunctionSequence.cc
index df68b1b64..f87c271f7 100644
--- a/runtime/onert/core/src/exec/FunctionSequence.cc
+++ b/runtime/onert/core/src/exec/FunctionSequence.cc
@@ -34,9 +34,7 @@ void FunctionSequence::run()
     // Thus, those two bakends cannot reach here.
 
     // Do dynamic shape inference
-    auto op_ind = _dynamic_tensor_ctx->op_ind;
-    auto &op = _dynamic_tensor_ctx->operations->at(op_ind);
-    op.accept(*_dynamic_tensor_ctx->dynamic_shape_inferer);
+    _dynamic_tensor_ctx->op->accept(*_dynamic_tensor_ctx->dynamic_shape_inferer);
 
     for (const auto &function : _functions)
     {
diff --git a/runtime/onert/core/src/exec/JSONExecTime.cc b/runtime/onert/core/src/exec/JSONExecTime.cc
index b29216a2f..d149345fd 100644
--- a/runtime/onert/core/src/exec/JSONExecTime.cc
+++ b/runtime/onert/core/src/exec/JSONExecTime.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "exec/JSONExecTime.h"
-#include "backend/IConfig.h"
+#include "JSONExecTime.h"
+
 #include <fstream>
 
 namespace onert
diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h
index 39d653154..a833466da 100644
--- a/runtime/onert/core/src/exec/LinearExecutor.h
+++ b/runtime/onert/core/src/exec/LinearExecutor.h
@@ -22,11 +22,10 @@
 #ifndef __ONERT_EXEC_EXECUTOR_H_
 #define __ONERT_EXEC_EXECUTOR_H_
 
-#include "ir/Index.h"
 #include "ExecutorBase.h"
-#include "compiler/Linear.h"
-#include "exec/FunctionSequence.h"
+
 #include "compiler/CodeMap.h"
+#include "ir/Index.h"
 #include "util/TracingCtx.h"
 
 namespace onert
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h
index 7f107fa22..7d459b0b4 100644
--- a/runtime/onert/core/src/exec/ParallelExecutor.h
+++ b/runtime/onert/core/src/exec/ParallelExecutor.h
@@ -17,19 +17,13 @@
 #ifndef __ONERT_EXEC_PARALLEL_EXECUTOR_H__
 #define __ONERT_EXEC_PARALLEL_EXECUTOR_H__
 
-#include <list>
-#include <queue>
-#include <unordered_map>
-
-#include "exec/FunctionSequence.h"
-#include "Job.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/Index.h"
-#include <memory>
-#include "exec/DataflowExecutor.h"
+#include "DataflowExecutor.h"
 #include "ParallelScheduler.h"
+
 #include "util/TracingCtx.h"
 
+#include <memory>
+
 namespace onert
 {
 namespace exec
diff --git a/runtime/onert/core/src/exec/feature/MockTensor.h b/runtime/onert/core/src/exec/feature/MockTensor.h
new file mode 100644
index 000000000..1d2d375e2
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/MockTensor.h
@@ -0,0 +1,66 @@
+
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/ITensor.h"
+
+template <typename T> class MockTensor : public onert::backend::ITensor
+{
+public:
+  MockTensor<T>(onert::ir::Shape &shape, T *buf, onert::ir::Layout layout)
+    : _buf(reinterpret_cast<uint8_t *>(buf)), _shape(shape), _layout(layout)
+  {
+  }
+
+public:
+  uint8_t *buffer() const override { return _buf; }
+
+  size_t calcOffset(const onert::ir::Coordinates &coords) const override
+  {
+    size_t rank = _shape.rank();
+    rank = rank == 0 ? 1 : rank;
+    size_t offset = 0;
+    for (size_t i = 0; i < rank; ++i)
+    {
+      auto dim = _shape.rank() == 0 ? 1 : _shape.dim(i);
+      offset = offset * dim + coords[i];
+    }
+    offset *= sizeof(T);
+
+    return offset;
+  }
+
+  onert::ir::Shape getShape() const override { return _shape; }
+
+public: // DUMMY methods
+  size_t total_size() const override { return 0; }
+  onert::ir::Layout layout() const override { return _layout; }
+  onert::ir::DataType data_type() const override { return onert::ir::DataType::UINT8; }
+  float data_scale() const override { return 0; }
+  int32_t data_zero_point() const override { return 0; }
+  const std::vector<float> &data_scales() const override { return _dummy_scales; }
+  const std::vector<int32_t> &data_zero_points() const override { return _dummy_zerops; }
+  bool has_padding() const override { return false; }
+  void access(const std::function<void(ITensor &tensor)> &fn) override {}
+  bool is_dynamic() const override { return false; }
+
+private:
+  uint8_t *_buf = nullptr;
+  onert::ir::Shape _shape;
+  onert::ir::Layout _layout = onert::ir::Layout::UNKNOWN;
+  std::vector<float> _dummy_scales;
+  std::vector<int32_t> _dummy_zerops;
+};
diff --git a/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc b/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc
new file mode 100644
index 000000000..f439cafb5
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Reader.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class Reader_nchw : public testing::Test
+{
+public:
+  void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+  void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+  {
+    _shape = onert::ir::FeatureShape(batch, depth, height, width);
+  }
+
+  void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+  {
+    auto elem_size = sizeof(T);
+    _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+                                      width * elem_size);
+  }
+
+  void createReader()
+  {
+    _reader =
+      std::make_shared<nchw::Reader<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+  }
+
+  void createUsingMockTensor()
+  {
+    onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+    _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NCHW);
+    _reader = std::make_shared<nchw::Reader<T>>(_tensor.get());
+  }
+
+  std::shared_ptr<Reader<T>> _reader = nullptr;
+
+private:
+  std::shared_ptr<std::vector<T>> _data = nullptr;
+  onert::ir::FeatureShape _shape;
+  onert::ir::FeatureShape _stride;
+  std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ReaderTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(Reader_nchw, ReaderTypes);
+
+TYPED_TEST(Reader_nchw, basic_reader)
+{
+  this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+  this->setShape(1, 2, 3, 2);
+  this->setStride(12, 6, 2, 1);
+  this->createReader();
+
+  // Data: NCHW
+  // Shape: NCHW
+  ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 8);
+  ASSERT_EQ(this->_reader->at(1, 1, 0), 8);
+
+  // Data: NCHW
+  // Shape: NCHW
+  this->createUsingMockTensor();
+
+  ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 6);
+  ASSERT_EQ(this->_reader->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/exec/feature/nchw/View.test.cc b/runtime/onert/core/src/exec/feature/nchw/View.test.cc
new file mode 100644
index 000000000..c6dcda710
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nchw/View.test.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "View.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class View_nchw : public testing::Test
+{
+public:
+  void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+  void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+  {
+    _shape = onert::ir::FeatureShape(batch, depth, height, width);
+  }
+
+  void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+  {
+    auto elem_size = sizeof(T);
+    _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+                                      width * elem_size);
+  }
+
+  void createView()
+  {
+    _view =
+      std::make_shared<nchw::View<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+  }
+
+  void createUsingMockTensor()
+  {
+    onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+    _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NCHW);
+    _view = std::make_shared<nchw::View<T>>(_tensor.get());
+  }
+
+  std::shared_ptr<nchw::View<T>> _view = nullptr;
+
+private:
+  std::shared_ptr<std::vector<T>> _data = nullptr;
+  onert::ir::FeatureShape _shape;
+  onert::ir::FeatureShape _stride;
+  std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ViewTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(View_nchw, ViewTypes);
+
+TYPED_TEST(View_nchw, basic_view)
+{
+  this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+  this->setShape(1, 2, 3, 2);
+  this->setStride(12, 6, 2, 1);
+  this->createView();
+
+  // Data: NCHW
+  // Shape: NCHW
+  ASSERT_EQ(this->_view->at(0, 1, 1, 0), 8);
+  ASSERT_EQ(this->_view->at(1, 1, 0), 8);
+
+  // Data: NCHW
+  // Shape: NCHW
+  this->createUsingMockTensor();
+
+  ASSERT_EQ(this->_view->at(0, 1, 1, 0), 6);
+  ASSERT_EQ(this->_view->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc b/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc
new file mode 100644
index 000000000..773199042
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Reader.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class Reader_nhwc : public testing::Test
+{
+public:
+  void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+  void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+  {
+    _shape = onert::ir::FeatureShape(batch, depth, height, width);
+  }
+
+  void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+  {
+    auto elem_size = sizeof(T);
+    _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+                                      width * elem_size);
+  }
+
+  void createReader()
+  {
+    _reader =
+      std::make_shared<nhwc::Reader<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+  }
+
+  void createUsingMockTensor()
+  {
+    onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+    _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NHWC);
+    _reader = std::make_shared<nhwc::Reader<T>>(_tensor.get());
+  }
+
+  std::shared_ptr<nhwc::Reader<T>> _reader = nullptr;
+
+private:
+  std::shared_ptr<std::vector<T>> _data = nullptr;
+  onert::ir::FeatureShape _shape;
+  onert::ir::FeatureShape _stride;
+  std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ReaderTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(Reader_nhwc, ReaderTypes);
+TYPED_TEST_SUITE(MockTensorReader_nhwc, ReaderTypes);
+
+TYPED_TEST(Reader_nhwc, basic_reader)
+{
+  this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+  this->setShape(1, 2, 3, 2);
+  this->setStride(12, 1, 6, 2);
+  this->createReader();
+
+  // Data: NCHW
+  // Shape: NHWC
+  ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 8);
+  ASSERT_EQ(this->_reader->at(1, 1, 0), 8);
+
+  // Data: NHWC
+  // Shape: NHWC
+  this->createUsingMockTensor();
+
+  ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 6);
+  ASSERT_EQ(this->_reader->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.h b/runtime/onert/core/src/exec/feature/nhwc/View.h
index 40d1d237c..c98d050c3 100644
--- a/runtime/onert/core/src/exec/feature/nhwc/View.h
+++ b/runtime/onert/core/src/exec/feature/nhwc/View.h
@@ -17,7 +17,7 @@
 #ifndef __ONERT_EXEC_FEATURE_NHWC_VIEW_H__
 #define __ONERT_EXEC_FEATURE_NHWC_VIEW_H__
 
-#include "../Reader.h"
+#include "Reader.h"
 
 #include <cassert>
 #include <cstddef>
diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.test.cc b/runtime/onert/core/src/exec/feature/nhwc/View.test.cc
new file mode 100644
index 000000000..bdd73d5a7
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nhwc/View.test.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "View.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class View_nhwc : public testing::Test
+{
+public:
+  void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+  void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+  {
+    _shape = onert::ir::FeatureShape(batch, depth, height, width);
+  }
+
+  void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+  {
+    auto elem_size = sizeof(T);
+    _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+                                      width * elem_size);
+  }
+
+  void createView()
+  {
+    _view =
+      std::make_shared<nhwc::View<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+  }
+
+  void createUsingMockTensor()
+  {
+    onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+    _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NHWC);
+    _view = std::make_shared<nhwc::View<T>>(_tensor.get());
+  }
+
+  std::shared_ptr<nhwc::View<T>> _view = nullptr;
+
+private:
+  std::shared_ptr<std::vector<T>> _data = nullptr;
+  onert::ir::FeatureShape _shape;
+  onert::ir::FeatureShape _stride;
+  std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ViewTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(View_nhwc, ViewTypes);
+TYPED_TEST_SUITE(MockTensorView_nhwc, ViewTypes);
+
+TYPED_TEST(View_nhwc, basic_view)
+{
+  this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+  this->setShape(1, 2, 3, 2);
+  this->setStride(12, 1, 6, 2);
+  this->createView();
+
+  // Data: NCHW
+  // Shape: NHWC
+  ASSERT_EQ(this->_view->at(0, 1, 1, 0), 8);
+  ASSERT_EQ(this->_view->at(1, 1, 0), 8);
+
+  // Data: NHWC
+  // Shape: NHWC
+  this->createUsingMockTensor();
+
+  ASSERT_EQ(this->_view->at(0, 1, 1, 0), 6);
+  ASSERT_EQ(this->_view->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/interp/InterpExecutor.cc b/runtime/onert/core/src/interp/InterpExecutor.cc
index 44d1575d7..f04777174 100644
--- a/runtime/onert/core/src/interp/InterpExecutor.cc
+++ b/runtime/onert/core/src/interp/InterpExecutor.cc
@@ -14,9 +14,10 @@
  * limitations under the License.
  */
 
-#include "interp/InterpExecutor.h"
-#include "interp/ExecEnv.h"
-#include "interp/Interpreter.h"
+#include "InterpExecutor.h"
+
+#include "ExecEnv.h"
+#include "Interpreter.h"
 
 #include "util/logging.h"
 
diff --git a/runtime/onert/core/src/interp/InterpExecutor.h b/runtime/onert/core/src/interp/InterpExecutor.h
index df6153d09..d6d5dd0a3 100644
--- a/runtime/onert/core/src/interp/InterpExecutor.h
+++ b/runtime/onert/core/src/interp/InterpExecutor.h
@@ -74,7 +74,12 @@ public:
   }
 
 private:
-  const ir::Graph &_graph;
+  /**
+   * @brief Copy of target graph for lowering
+   * @note  It uses copy of graph, not reference.
+   *        Original graph may be deallocated by frontend.
+   */
+  const ir::Graph _graph;
   ir::OperandIndexMap<std::shared_ptr<ITensor>> _tensor_map;
 };
 
diff --git a/runtime/onert/core/src/interp/InterpExecutor.test.cc b/runtime/onert/core/src/interp/InterpExecutor.test.cc
new file mode 100644
index 000000000..9f95ffee0
--- /dev/null
+++ b/runtime/onert/core/src/interp/InterpExecutor.test.cc
@@ -0,0 +1,355 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "InterpExecutor.h"
+
+#include "exec/Execution.h"
+#include "ir/Graph.h"
+#include "ir/operation/BinaryArithmetic.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+namespace
+{
+
+using namespace onert::ir;
+using InterpExecutor = onert::interp::InterpExecutor;
+using Execution = onert::exec::Execution;
+using Executors = onert::exec::Executors;
+
+class InterpExecutorTest : public ::testing::Test
+{
+protected:
+  virtual void SetUp() {}
+  void CreateSimpleModel()
+  {
+    // Model: one elementwise add operation
+    // model input: lhs, rhs
+    // model output: add result
+    // lhs, rhs, result shape: {1, 2, 2, 1}
+    // activation: none (constant)
+    _graph = std::make_unique<Graph>();
+
+    // Add operands
+
+    Shape shape{1, 2, 2, 1};
+    TypeInfo type{DataType::INT32};
+    Shape shape_scalar(0);
+    TypeInfo type_scalar{DataType::INT32};
+
+    auto operand_lhs = _graph->addOperand(shape, type);
+    auto operand_rhs = _graph->addOperand(shape, type);
+    auto operand_result = _graph->addOperand(shape, type);
+
+    // Add operations
+
+    operation::BinaryArithmetic::Param param;
+    param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+    param.activation = Activation::NONE;
+    auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
+    auto output_set = OperandIndexSequence{operand_result};
+    _graph->addOperation(
+      std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
+
+    // Identify model inputs and outputs
+
+    _graph->getInputs().append(operand_lhs);
+    _graph->getInputs().append(operand_rhs);
+    _graph->getOutputs().append(operand_result);
+
+    _graph->verify();
+
+    auto model = std::make_shared<onert::ir::Model>();
+    model->push(onert::ir::SubgraphIndex{0}, _graph);
+
+    _executors = std::make_shared<Executors>();
+    _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph));
+  }
+
+  void CreateTwoStepModel()
+  {
+    // Model: two elementwise add operation
+    // model input: lhs, rhs1
+    // model output: second add result (result2)
+    // constant: rhs2
+    // result1 <= (lhs + rhs)
+    // result2 <= (result1 + rhs2)
+    // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1}
+    // activation: none (constant)
+    _graph = std::make_unique<Graph>();
+
+    // 1st add operands (result1 <= lhs + rhs1)
+
+    Shape shape{1, 2, 2, 1};
+    TypeInfo type{DataType::INT32};
+    Shape shape_scalar(0);
+    TypeInfo type_scalar{DataType::INT32};
+
+    static int32_t rhs2_data[4] = {3, 1, -1, 5};
+
+    auto operand_lhs = _graph->addOperand(shape, type);
+    auto operand_rhs1 = _graph->addOperand(shape, type);
+    auto operand_result1 = _graph->addOperand(shape, type);
+    auto operand_rhs2 = _graph->addOperand(shape, type);
+    auto operand_result2 = _graph->addOperand(shape, type);
+    _graph->operands()
+      .at(operand_rhs2)
+      .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
+
+    // 2nd add operations (result2 <= result1 + rhs2)
+
+    operation::BinaryArithmetic::Param param1;
+    param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+    param1.activation = Activation::NONE;
+    auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
+    auto output_set1 = OperandIndexSequence{operand_result1};
+    _graph->addOperation(
+      std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+
+    operation::BinaryArithmetic::Param param2;
+    param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+    param2.activation = Activation::NONE;
+    auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
+    auto output_set2 = OperandIndexSequence{operand_result2};
+    _graph->addOperation(
+      std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
+
+    // Identify model inputs and outputs
+
+    _graph->getInputs().append(operand_lhs);
+    _graph->getInputs().append(operand_rhs1);
+    _graph->getOutputs().append(operand_result2);
+
+    _graph->verify();
+
+    auto model = std::make_shared<onert::ir::Model>();
+    model->push(onert::ir::SubgraphIndex{0}, _graph);
+
+    _executors = std::make_shared<Executors>();
+    _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph));
+  }
+
+  void CreateUnspecifiedDimensionsModel()
+  {
+    // Model: one elementwise add operation
+    // model input: lhs, rhs
+    // model output: add result
+    // lhs, rhs, result shape: {1, unknown, 2, 1}
+    // activation: none (constant)
+    _graph = std::make_unique<Graph>();
+
+    // Add operands
+
+    Shape shape{1, 0, 2, 1};
+    TypeInfo type{DataType::INT32};
+    Shape shape_scalar(0);
+    TypeInfo type_scalar{DataType::INT32};
+
+    auto operand_lhs = _graph->addOperand(shape, type);
+    auto operand_rhs = _graph->addOperand(shape, type);
+
+    auto operand_activation = _graph->addOperand(shape_scalar, type_scalar);
+    _graph->operands()
+      .at(operand_activation)
+      .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&_activation_value), 4));
+
+    auto operand_result = _graph->addOperand(shape, type);
+
+    // Add operations
+
+    operation::BinaryArithmetic::Param param;
+    param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+    param.activation = Activation::NONE;
+    auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
+    auto output_set = OperandIndexSequence{operand_result};
+    _graph->addOperation(
+      std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
+
+    // Identify model inputs and outputs
+
+    _graph->getInputs().append(operand_lhs);
+    _graph->getInputs().append(operand_rhs);
+    _graph->getOutputs().append(operand_result);
+
+    _graph->verify();
+
+    auto model = std::make_shared<onert::ir::Model>();
+    model->push(onert::ir::SubgraphIndex{0}, _graph);
+
+    _executors = std::make_shared<Executors>();
+    _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph));
+  }
+
+  void createExecution() { _execution = std::make_unique<Execution>(_executors); }
+
+  virtual void TearDown() { _executors = nullptr; }
+
+  std::shared_ptr<Graph> _graph{nullptr};
+  std::shared_ptr<Executors> _executors{nullptr};
+  std::unique_ptr<Execution> _execution{nullptr};
+  const int32_t _activation_value{0};
+};
+
+TEST_F(InterpExecutorTest, create_empty)
+{
+  Graph graph;
+  graph.verify();
+  auto executor = std::make_unique<InterpExecutor>(graph);
+  ASSERT_NE(executor, nullptr);
+}
+
+TEST_F(InterpExecutorTest, create_simple)
+{
+  CreateSimpleModel();
+  ASSERT_NE(_executors, nullptr);
+  ASSERT_NE(_executors->at(onert::ir::SubgraphIndex{0}), nullptr);
+}
+
+TEST_F(InterpExecutorTest, neg_setInput)
+{
+  CreateSimpleModel();
+  createExecution();
+
+  auto input1 = IOIndex{0};
+  const int32_t input1_buffer[4] = {1, 0, -1, -2};
+
+  EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 4),
+               std::runtime_error);
+  EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 12),
+               std::runtime_error);
+  EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
+}
+
+TEST_F(InterpExecutorTest, neg_setOutput)
+{
+  CreateSimpleModel();
+  createExecution();
+
+  auto output = IOIndex{0};
+  auto output_idx = _graph->getOutputs().at(output);
+
+  int32_t output_buffer[4] = {};
+
+  EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 4),
+               std::runtime_error);
+  EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 12),
+               std::runtime_error);
+  EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
+}
+
+TEST_F(InterpExecutorTest, neg_setInputForUnspecifiedDimensions)
+{
+  CreateUnspecifiedDimensionsModel();
+  createExecution();
+
+  auto input1 = IOIndex{0};
+  const int32_t input1_buffer[4] = {1, 0, -1, -2};
+
+  TypeInfo operand_type{DataType::INT32};
+  Shape operand_shape{1, 2, 2, 1};
+
+  EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
+                                    reinterpret_cast<const void *>(input1_buffer), 4),
+               std::runtime_error);
+  EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
+                                    reinterpret_cast<const void *>(input1_buffer), 12),
+               std::runtime_error);
+  EXPECT_NO_THROW(_execution->setInput(input1, operand_type, operand_shape,
+                                       reinterpret_cast<const void *>(input1_buffer), 16));
+}
+
+TEST_F(InterpExecutorTest, neg_setOutputForUnspecifiedDimensions)
+{
+  CreateUnspecifiedDimensionsModel();
+  createExecution();
+
+  auto output = IOIndex{0};
+  auto output_idx = _graph->getOutputs().at(output);
+
+  TypeInfo operand_type{DataType::INT32};
+  Shape operand_shape{1, 2, 2, 1};
+
+  int32_t output_buffer[4] = {};
+
+  EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
+                                     reinterpret_cast<void *>(output_buffer), 4),
+               std::runtime_error);
+  EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
+                                     reinterpret_cast<void *>(output_buffer), 12),
+               std::runtime_error);
+  EXPECT_NO_THROW(_execution->setOutput(output, operand_type, operand_shape,
+                                        reinterpret_cast<void *>(output_buffer), 16));
+}
+
+TEST_F(InterpExecutorTest, execute)
+{
+  CreateSimpleModel();
+  createExecution();
+
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto input1_idx = _graph->getInputs().at(input1);
+  auto input2_idx = _graph->getInputs().at(input2);
+
+  const int32_t input1_buffer[4] = {1, 0, -1, -2};
+  const int32_t input2_buffer[4] = {1, -3, 2, -4};
+
+  auto output = IOIndex{0};
+  auto output_idx = _graph->getOutputs().at(output);
+
+  int32_t output_buffer[4] = {};
+
+  EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
+  EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
+  EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
+  EXPECT_NO_THROW(_execution->execute());
+  EXPECT_EQ(output_buffer[0], 2);
+  EXPECT_EQ(output_buffer[1], -3);
+  EXPECT_EQ(output_buffer[2], 1);
+  EXPECT_EQ(output_buffer[3], -6);
+}
+
+TEST_F(InterpExecutorTest, executeTwoStep)
+{
+  CreateTwoStepModel();
+  createExecution();
+
+  auto input1 = IOIndex{0};
+  auto input2 = IOIndex{1};
+  auto input1_idx = _graph->getInputs().at(input1);
+  auto input2_idx = _graph->getInputs().at(input2);
+
+  const int32_t input1_buffer[4] = {1, 0, -1, -2};
+  const int32_t input2_buffer[4] = {1, -3, 2, -4};
+
+  auto output = IOIndex{0};
+  auto output_idx = _graph->getOutputs().at(output);
+
+  int32_t output_buffer[4] = {};
+
+  EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
+  EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
+  EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
+  EXPECT_NO_THROW(_execution->execute());
+  EXPECT_EQ(output_buffer[0], 5);
+  EXPECT_EQ(output_buffer[1], -2);
+  EXPECT_EQ(output_buffer[2], 0);
+  EXPECT_EQ(output_buffer[3], -1);
+}
+
+} // namespace
diff --git a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
index 804e9fb51..fe4acd309 100644
--- a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
+++ b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include <cker/operation/BinaryArithmeticOps.h>
-
 #include "OperationUtil.h"
+#include "../Registration.h"
 
-#include "interp/Registration.h"
 #include "ir/operation/BinaryArithmetic.h"
-#include "misc/polymorphic_downcast.h"
-#include "cker/Types.h"
+
+#include <cker/operation/BinaryArithmeticOps.h>
+#include <cker/Types.h>
+#include <misc/polymorphic_downcast.h>
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/interp/operations/Concat.cc b/runtime/onert/core/src/interp/operations/Concat.cc
index a063ab14a..103604631 100644
--- a/runtime/onert/core/src/interp/operations/Concat.cc
+++ b/runtime/onert/core/src/interp/operations/Concat.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include <cker/operation/Concatenation.h>
-
 #include "OperationUtil.h"
+#include "../Registration.h"
 
-#include "interp/Registration.h"
 #include "ir/operation/Concat.h"
-#include "misc/polymorphic_downcast.h"
+
+#include <cker/operation/Concatenation.h>
+#include <misc/polymorphic_downcast.h>
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/interp/operations/Conv2D.cc b/runtime/onert/core/src/interp/operations/Conv2D.cc
index 0b43a4799..72c2057c2 100644
--- a/runtime/onert/core/src/interp/operations/Conv2D.cc
+++ b/runtime/onert/core/src/interp/operations/Conv2D.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include <cker/operation/Conv.h>
-
 #include "OperationUtil.h"
+#include "../Registration.h"
 
-#include "interp/Registration.h"
 #include "ir/operation/Conv2D.h"
-#include "util/Utils.h"
 #include "util/ShapeInference.h"
-#include "misc/polymorphic_downcast.h"
+#include "util/Utils.h"
+
+#include <cker/operation/Conv.h>
+#include <misc/polymorphic_downcast.h>
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
index d1c62d73f..9f527440e 100644
--- a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
+++ b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include <cker/operation/DepthwiseConv.h>
-#include <misc/polymorphic_downcast.h>
-
 #include "OperationUtil.h"
+#include "../Registration.h"
 
-#include "interp/Registration.h"
 #include "ir/operation/DepthwiseConv2D.h"
-#include "util/Utils.h"
 #include "util/ShapeInference.h"
+#include "util/Utils.h"
+
+#include <cker/operation/DepthwiseConv.h>
+#include <misc/polymorphic_downcast.h>
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
index 197855ff4..e13080e76 100644
--- a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
+++ b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
@@ -14,17 +14,16 @@
  * limitations under the License.
  */
 
-#include <cmath>
-
 #include "OperationUtil.h"
-
-#include "interp/Registration.h"
+#include "../Registration.h"
 
 #include "ir/operation/ElementwiseActivation.h"
 
-#include <misc/polymorphic_downcast.h>
 #include <cker/operation/Logistic.h>
 #include <cker/operation/Tanh.h>
+#include <misc/polymorphic_downcast.h>
+
+#include <cmath>
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/interp/operations/FullyConnected.cc b/runtime/onert/core/src/interp/operations/FullyConnected.cc
index ef827605b..2bc9f517f 100644
--- a/runtime/onert/core/src/interp/operations/FullyConnected.cc
+++ b/runtime/onert/core/src/interp/operations/FullyConnected.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include <cker/operation/FullyConnected.h>
-
 #include "OperationUtil.h"
+#include "../Registration.h"
 
-#include "interp/Registration.h"
 #include "ir/operation/FullyConnected.h"
-#include "misc/polymorphic_downcast.h"
+
+#include <cker/operation/FullyConnected.h>
+#include <misc/polymorphic_downcast.h>
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/interp/operations/Gather.cc b/runtime/onert/core/src/interp/operations/Gather.cc
index 0ea60875c..d686cfcf6 100644
--- a/runtime/onert/core/src/interp/operations/Gather.cc
+++ b/runtime/onert/core/src/interp/operations/Gather.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include <cker/operation/Gather.h>
-
 #include "OperationUtil.h"
+#include "../Registration.h"
 
-#include "interp/Registration.h"
 #include "ir/operation/Gather.h"
-#include "misc/polymorphic_downcast.h"
+
+#include <cker/operation/Gather.h>
+#include <misc/polymorphic_downcast.h>
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/interp/operations/InstanceNorm.cc b/runtime/onert/core/src/interp/operations/InstanceNorm.cc
index b5c38819d..318088457 100644
--- a/runtime/onert/core/src/interp/operations/InstanceNorm.cc
+++ b/runtime/onert/core/src/interp/operations/InstanceNorm.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include <cker/operation/InstanceNorm.h>
-
 #include "OperationUtil.h"
+#include "../Registration.h"
 
-#include "interp/Registration.h"
 #include "ir/operation/InstanceNorm.h"
-#include "misc/polymorphic_downcast.h"
+
+#include <cker/operation/InstanceNorm.h>
+#include <misc/polymorphic_downcast.h>
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/interp/operations/Pad.cc b/runtime/onert/core/src/interp/operations/Pad.cc
index 0eec7fe9a..3db0828eb 100644
--- a/runtime/onert/core/src/interp/operations/Pad.cc
+++ b/runtime/onert/core/src/interp/operations/Pad.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include <cker/operation/Pad.h>
-
 #include "OperationUtil.h"
+#include "../Registration.h"
 
-#include "interp/Registration.h"
 #include "ir/operation/Pad.h"
 
+#include <cker/operation/Pad.h>
+
 namespace onert
 {
 namespace interp
diff --git a/runtime/onert/core/src/interp/operations/Pool2D.cc b/runtime/onert/core/src/interp/operations/Pool2D.cc
index 2f3b71655..3935d4756 100644
--- a/runtime/onert/core/src/interp/operations/Pool2D.cc
+++ b/runtime/onert/core/src/interp/operations/Pool2D.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include <cker/operation/AveragePool.h>
-#include <cker/operation/MaxPool.h>
-
 #include "OperationUtil.h"
+#include "../Registration.h"
 
-#include "interp/Registration.h"
 #include "ir/operation/Pool2D.h"
-#include "util/Utils.h"
 #include "util/ShapeInference.h"
-#include "misc/polymorphic_downcast.h"
+#include "util/Utils.h"
+
+#include <cker/operation/AveragePool.h>
+#include <cker/operation/MaxPool.h>
+#include <misc/polymorphic_downcast.h>
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/interp/operations/Reshape.cc b/runtime/onert/core/src/interp/operations/Reshape.cc
index 3a118456b..1de5a5762 100644
--- a/runtime/onert/core/src/interp/operations/Reshape.cc
+++ b/runtime/onert/core/src/interp/operations/Reshape.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "interp/Registration.h"
+#include "../Registration.h"
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/interp/operations/Softmax.cc b/runtime/onert/core/src/interp/operations/Softmax.cc
index 1fc303117..8be2f2210 100644
--- a/runtime/onert/core/src/interp/operations/Softmax.cc
+++ b/runtime/onert/core/src/interp/operations/Softmax.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include <cker/operation/SoftMax.h>
-
 #include "OperationUtil.h"
+#include "../Registration.h"
 
-#include "interp/Registration.h"
 #include "ir/operation/Softmax.h"
-#include "misc/polymorphic_downcast.h"
+
+#include <cker/operation/SoftMax.h>
+#include <misc/polymorphic_downcast.h>
 
 namespace onert
 {
diff --git a/runtime/onert/core/src/interp/operations/TransposeConv.cc b/runtime/onert/core/src/interp/operations/TransposeConv.cc
index 755103dc2..59c8e8cdf 100644
--- a/runtime/onert/core/src/interp/operations/TransposeConv.cc
+++ b/runtime/onert/core/src/interp/operations/TransposeConv.cc
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include <cker/operation/TransposeConv.h>
-#include <misc/polymorphic_downcast.h>
-
 #include "OperationUtil.h"
+#include "../Registration.h"
 
-#include "interp/Registration.h"
 #include "ir/operation/TransposeConv.h"
 
+#include <cker/operation/TransposeConv.h>
+#include <misc/polymorphic_downcast.h>
+
 namespace onert
 {
 namespace interp
diff --git a/runtime/onert/core/src/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.cc
index df30bbdbe..28cf4137d 100644
--- a/runtime/onert/core/src/ir/Graph.cc
+++ b/runtime/onert/core/src/ir/Graph.cc
@@ -17,19 +17,9 @@
 #include "ir/Graph.h"
 
 #include "OperationValidator.h"
+#include "verifier/Verifier.h"
 
-#include <algorithm>
-
-#include <bitset>
-#include <sstream>
-
-#include "util/logging.h"
 #include "util/Set.h"
-#include "verifier/Verifier.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/OperationIndexMap.h"
-#include "dumper/text/GraphDumper.h"
-#include "backend/IConfig.h"
 
 namespace onert
 {
@@ -38,6 +28,8 @@ namespace ir
 
 Graph::Graph() = default;
 
+Graph::Graph(const Graph &) = default;
+
 Graph::~Graph(void) = default;
 
 OperandIndex Graph::addOperand(const Shape &shape, const TypeInfo &type)
diff --git a/runtime/onert/core/src/ir/Graph.test.cc b/runtime/onert/core/src/ir/Graph.test.cc
new file mode 100644
index 000000000..144500745
--- /dev/null
+++ b/runtime/onert/core/src/ir/Graph.test.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Graph.h"
+#include "ir/operation/BinaryArithmetic.h"
+
+#include <gtest/gtest.h>
+
+TEST(Graph, neg_inputs_and_outputs)
+{
+  onert::ir::Graph graph;
+
+  onert::ir::OperandIndex index0{0u};
+  onert::ir::OperandIndex index1{1u};
+
+  graph.addInput({index0});
+  graph.addInput({index1});
+
+  onert::ir::OperandIndex index10{10u};
+  onert::ir::OperandIndex index11{11u};
+  onert::ir::OperandIndex index12{12u};
+
+  graph.addOutput({index10});
+  graph.addOutput({index11});
+  graph.addOutput({index12});
+
+  ASSERT_EQ(graph.getInputs().size(), 2);
+  ASSERT_EQ(graph.getOutputs().size(), 3);
+
+  onert::ir::IOIndex io_index0{0};
+  onert::ir::IOIndex io_index1{1};
+  onert::ir::IOIndex io_index2{2};
+
+  ASSERT_EQ(graph.getInputs().at(io_index0), 0);
+  ASSERT_EQ(graph.getInputs().at(io_index1), 1);
+
+  ASSERT_EQ(graph.getOutputs().at(io_index0), 10);
+  ASSERT_EQ(graph.getOutputs().at(io_index1), 11);
+  ASSERT_EQ(graph.getOutputs().at(io_index2), 12);
+
+  EXPECT_THROW(graph.getOutputs().at(onert::ir::IOIndex{3}), std::out_of_range);
+}
+
+using namespace onert::ir;
+
+OperationIndex addAddOperation(Graph &graph, const OperandIndexSequence inputs,
+                               const OperandIndexSequence outputs)
+{
+  // Add "ADD" operation
+  operation::BinaryArithmetic::Param param;
+  param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+  param.activation = Activation::NONE;
+  return graph.addOperation(std::make_unique<operation::BinaryArithmetic>(inputs, outputs, param));
+}
+
+TEST(Graph, OneOpGraphSimpleValid)
+{
+  // Simple Graph with just one Add operation
+
+  Graph graph;
+
+  // Add tensors
+  Shape shape{1, 2, 2, 1};
+  TypeInfo type{DataType::FLOAT32};
+  auto lhs = graph.addOperand(shape, type);
+  auto rhs = graph.addOperand(shape, type);
+  auto res = graph.addOperand(shape, type);
+
+  addAddOperation(graph, {lhs, rhs}, {res});
+
+  // Set model inputs/outputs
+  graph.addInput(lhs);
+  graph.addInput(rhs);
+  graph.addOutput(res);
+
+  graph.verify();
+
+  SUCCEED();
+}
+
+TEST(Graph, neg_InvalidGraph_BadInput)
+{
+  Graph graph;
+
+  // Add tensors
+  Shape shape{1, 2, 2, 1};
+  TypeInfo type{DataType::FLOAT32};
+  auto in = graph.addOperand(shape, type);
+  auto out = graph.addOperand(shape, type);
+
+  // Set model inputs/outputs
+  graph.addInput(in);
+  graph.addOutput(out);
+  graph.addInput(OperandIndex{89}); // Non-exisiting operand!
+
+  EXPECT_ANY_THROW(graph.verify());
+}
+
+TEST(Graph, neg_InvalidGraph_BadOutput)
+{
+  Graph graph;
+
+  // Add tensors
+  Shape shape{1, 2, 2, 1};
+  TypeInfo type{DataType::FLOAT32};
+  auto in = graph.addOperand(shape, type);
+  auto out = graph.addOperand(shape, type);
+
+  // Set model inputs/outputs
+  graph.addInput(in);
+  graph.addOutput(out);
+  graph.addOutput(OperandIndex{12}); // Non-exisiting operand!
+
+  EXPECT_ANY_THROW(graph.verify());
+}
+
+TEST(Graph, neg_InvalidAddOperation_BadInputIndex)
+{
+  Graph graph;
+
+  // Add tensors
+  Shape shape{1, 2, 2, 1};
+  TypeInfo type{DataType::FLOAT32};
+  auto lhs = graph.addOperand(shape, type);
+  auto rhs = graph.addOperand(shape, type);
+  auto res = graph.addOperand(shape, type);
+
+  // Set model inputs/outputs
+  graph.addInput(lhs);
+  graph.addInput(rhs);
+  graph.addOutput(res);
+
+  ASSERT_FALSE(addAddOperation(graph, {lhs, OperandIndex{99}}, {res}).valid());
+}
diff --git a/runtime/onert/core/src/ir/LayoutSet.test.cc b/runtime/onert/core/src/ir/LayoutSet.test.cc
new file mode 100644
index 000000000..fc956abe8
--- /dev/null
+++ b/runtime/onert/core/src/ir/LayoutSet.test.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LayoutSet.h"
+
+#include <gtest/gtest.h>
+
+using onert::ir::Layout;
+using onert::ir::LayoutSet;
+
+TEST(ir_LayoutSet, neg_add_remove)
+{
+  LayoutSet set{Layout::NCHW};
+  set.remove(Layout::NHWC);
+  ASSERT_EQ(set.size(), 1);
+  set.add(Layout::NHWC);
+  ASSERT_EQ(set.size(), 2);
+  set.remove(Layout::NHWC);
+  ASSERT_EQ(set.size(), 1);
+  set.remove(Layout::NCHW);
+  ASSERT_EQ(set.size(), 0);
+  set.remove(Layout::NCHW);
+  ASSERT_EQ(set.size(), 0);
+}
+
+TEST(ir_LayoutSet, neg_add_twice)
+{
+  LayoutSet set;
+  set.add(Layout::NHWC);
+  ASSERT_EQ(set.size(), 1);
+  set.add(Layout::NHWC);
+  ASSERT_EQ(set.size(), 1);
+}
+
+TEST(ir_LayoutSet, set_operators)
+{
+  LayoutSet set1{Layout::NCHW};
+  LayoutSet set2{Layout::NHWC};
+  LayoutSet set3 = set1 | set2;
+
+  ASSERT_EQ(set3.size(), 2);
+
+  ASSERT_EQ((set3 - set1).size(), 1);
+  ASSERT_EQ((set3 - set1).contains(Layout::NHWC), true);
+  ASSERT_EQ((set3 - set2).size(), 1);
+  ASSERT_EQ((set3 - set2).contains(Layout::NCHW), true);
+  ASSERT_EQ((set3 - set3).size(), 0);
+
+  ASSERT_EQ((set3 & set1).size(), 1);
+  ASSERT_EQ((set3 & set1).contains(Layout::NCHW), true);
+  ASSERT_EQ((set3 & set2).size(), 1);
+  ASSERT_EQ((set3 & set2).contains(Layout::NHWC), true);
+  ASSERT_EQ((set1 & set2).size(), 0);
+}
diff --git a/runtime/onert/test/core/ir/MockNode.h b/runtime/onert/core/src/ir/MockNode.h
index 0e7ed977b..0e7ed977b 100644
--- a/runtime/onert/test/core/ir/MockNode.h
+++ b/runtime/onert/core/src/ir/MockNode.h
diff --git a/runtime/onert/core/src/ir/Operand.test.cc b/runtime/onert/core/src/ir/Operand.test.cc
new file mode 100644
index 000000000..0b858792a
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operand.test.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Graph.h"
+
+#include "MockNode.h"
+#include "verifier/Verifier.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <typeindex>
+
+namespace
+{
+
+using IndexSet = onert::ir::OperandIndexSequence;
+using Mock = onert_test::ir::SimpleMock;
+
+} // namespace
+
+TEST(ir_Operand, neg_usedef)
+{
+  onert::ir::Graph graph;
+  onert::ir::verifier::DAGChecker verifier;
+
+  onert::ir::Shape shape(3);
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  // Model Input/Output
+  auto input_operand = graph.addOperand(shape, type);
+  auto output_operand = graph.addOperand(shape, type);
+
+  graph.addInput(input_operand);
+  graph.addOutput(output_operand);
+
+  // MockNode1
+  auto operand_index1 = graph.addOperand(shape, type);
+  auto mocknode_index1 =
+    graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1}));
+
+  // MockNode2
+  auto operand_index2 = graph.addOperand(shape, type);
+  auto mocknode_index2 =
+    graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2}));
+
+  // MockNode3(two input)
+  auto multiinput_index = graph.addOperation(
+    std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand}));
+
+  graph.verify();
+
+  ASSERT_TRUE(verifier.verify(graph));
+
+  // Check def
+  ASSERT_EQ(graph.operands().at(operand_index1).getDef(), mocknode_index1);
+  ASSERT_EQ(graph.operands().at(operand_index2).getDef(), mocknode_index2);
+  ASSERT_EQ(graph.operands().at(output_operand).getDef(), multiinput_index);
+
+  ASSERT_NE(graph.operands().at(operand_index1).getDef(), mocknode_index2);
+  ASSERT_NE(graph.operands().at(operand_index1).getDef(), multiinput_index);
+
+  // Check use
+  ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index1), true);
+  ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index2), true);
+  ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(multiinput_index), false);
+  ASSERT_EQ(graph.operands().at(operand_index1).getUses().contains(multiinput_index), true);
+  ASSERT_EQ(graph.operands().at(operand_index2).getUses().contains(multiinput_index), true);
+
+  ASSERT_EQ(graph.operands().at(input_operand).getUses().size(), 2);
+  ASSERT_EQ(graph.operands().at(operand_index1).getUses().size(), 1);
+  ASSERT_EQ(graph.operands().at(output_operand).getUses().size(), 0);
+}
diff --git a/runtime/onert/core/src/ir/OperandIndexSequence.test.cc b/runtime/onert/core/src/ir/OperandIndexSequence.test.cc
new file mode 100644
index 000000000..588c4e419
--- /dev/null
+++ b/runtime/onert/core/src/ir/OperandIndexSequence.test.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/OperandIndexSequence.h"
+
+#include <gtest/gtest.h>
+
+using onert::ir::OperandIndex;
+using onert::ir::OperandIndexSequence;
+
+TEST(ir_OperandIndexSequence, neg_append)
+{
+  OperandIndexSequence iset{0, 2, 4, 8};
+
+  ASSERT_EQ(iset.size(), 4);
+
+  iset.append(OperandIndex{10});
+
+  ASSERT_EQ(iset.size(), 5);
+
+  onert::ir::IOIndex index1{1};
+  onert::ir::IOIndex index2{4};
+
+  ASSERT_EQ(iset.at(index1), 2);
+  ASSERT_EQ(iset.at(index2), 10);
+
+  ASSERT_TRUE(iset.contains(OperandIndex{2}));
+  ASSERT_TRUE(iset.contains(OperandIndex{10}));
+  ASSERT_FALSE(iset.contains(OperandIndex{11}));
+}
+
+TEST(graph_OperandIndexSequence, neg_replace)
+{
+  OperandIndexSequence iset{0, 1, 2, 3};
+
+  iset.replace(OperandIndex{1}, OperandIndex{9});
+  ASSERT_FALSE(iset.contains(OperandIndex{1}));
+  ASSERT_TRUE(iset.contains(OperandIndex{9}));
+}
diff --git a/runtime/onert/core/src/ir/Operands.test.cc b/runtime/onert/core/src/ir/Operands.test.cc
new file mode 100644
index 000000000..aff228b10
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operands.test.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Operands.h"
+
+#include <gtest/gtest.h>
+
+TEST(ir_Operands, neg_set_test)
+{
+  onert::ir::Operands set;
+
+  onert::ir::Shape shape0{1, 2, 3};
+
+  onert::ir::Shape shape1(4);
+  shape1.dim(0) = 10;
+  shape1.dim(1) = 20;
+  shape1.dim(2) = 30;
+  shape1.dim(3) = 40;
+
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  set.emplace(shape0, type);
+  set.emplace(shape1, type);
+
+  ASSERT_EQ(set.exist(onert::ir::OperandIndex{0u}), true);
+  ASSERT_EQ(set.exist(onert::ir::OperandIndex{1u}), true);
+  ASSERT_EQ(set.exist(onert::ir::OperandIndex{2u}), false);
+
+  ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(0), 1);
+  ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(1), 2);
+  ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(2), 3);
+}
diff --git a/runtime/onert/core/src/ir/Operation.test.cc b/runtime/onert/core/src/ir/Operation.test.cc
new file mode 100644
index 000000000..b3c4e852d
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operation.test.cc
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Graph.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexSequence.h"
+#include "ir/operation/Concat.h"
+#include "ir/operation/Conv2D.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <stdexcept>
+
+using Index = onert::ir::IOIndex;
+using IndexSet = onert::ir::OperandIndexSequence;
+
+TEST(ir_Operation_setIO, operation_setIO_conv)
+{
+  onert::ir::Graph graph;
+
+  onert::ir::Shape shape{3};
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  // Add Conv
+  using Graph = onert::ir::operation::Conv2D;
+
+  auto input_operand = graph.addOperand(shape, type);
+  auto kernel_operand = graph.addOperand(shape, type);
+  auto bias_operand = graph.addOperand(shape, type);
+  IndexSet inputs{input_operand, kernel_operand, bias_operand};
+
+  Graph::Param conv_params;
+  conv_params.padding.type = onert::ir::PaddingType::SAME;
+  conv_params.stride.horizontal = 1;
+  conv_params.stride.vertical = 1;
+  conv_params.activation = onert::ir::Activation::NONE;
+
+  auto output_operand = graph.addOperand(shape, type).value();
+  IndexSet outputs{output_operand};
+
+  auto conv = std::make_unique<Graph>(inputs, outputs, conv_params);
+
+  ASSERT_NE(conv, nullptr);
+  ASSERT_EQ(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
+  conv->setInputs({8, 9, 10});
+  ASSERT_NE(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
+  ASSERT_EQ(conv->getInputs().at(Index{0}).value(), 8);
+}
+
+TEST(ir_Operation_setIO, neg_operation_setIO_concat)
+{
+  onert::ir::Graph graph;
+
+  onert::ir::Shape shape{3};
+
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  using Graph = onert::ir::operation::Concat;
+
+  // Add Concat
+  IndexSet inputs;
+  for (int i = 0; i < 6; ++i)
+  {
+    inputs.append(graph.addOperand(shape, type));
+  }
+
+  Graph::Param concat_params{0};
+
+  auto output_operand = graph.addOperand(shape, type).value();
+  IndexSet outputs{output_operand};
+
+  auto concat = std::make_unique<Graph>(inputs, outputs, concat_params);
+
+  ASSERT_NE(concat, nullptr);
+  ASSERT_EQ(concat->getInputs().size(), 6);
+  ASSERT_EQ(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
+
+  concat->setInputs({80, 6, 9, 11});
+  ASSERT_EQ(concat->getInputs().size(), 4);
+  ASSERT_NE(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
+  ASSERT_EQ(concat->getInputs().at(Index{0}).value(), 80);
+  ASSERT_EQ(concat->getInputs().at(Index{2}).value(), 9);
+  ASSERT_THROW(concat->getInputs().at(Index{5}), std::out_of_range);
+}
diff --git a/runtime/onert/core/src/ir/Operations.test.cc b/runtime/onert/core/src/ir/Operations.test.cc
new file mode 100644
index 000000000..e57872689
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operations.test.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Operations.h"
+
+#include "MockNode.h"
+
+#include <gtest/gtest.h>
+
+using onert::ir::Operation;
+using onert::ir::OperationIndex;
+using onert::ir::Operations;
+
+TEST(ir_Operations, basic)
+{
+  Operations ops;
+  ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
+  OperationIndex idx{0u};
+  ASSERT_EQ(ops.at(idx).getInputs().size(), 4);
+  ASSERT_EQ(ops.at(idx).getOutputs().size(), 3);
+}
+
+TEST(ir_Operations, neg_at)
+{
+  Operations ops;
+  ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
+  OperationIndex idx{99u};
+  EXPECT_THROW(ops.at(idx), std::out_of_range);
+}
diff --git a/runtime/onert/core/src/ir/Shape.test.cc b/runtime/onert/core/src/ir/Shape.test.cc
new file mode 100644
index 000000000..afdb29254
--- /dev/null
+++ b/runtime/onert/core/src/ir/Shape.test.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Shape.h"
+
+#include <gtest/gtest.h>
+
+TEST(ShapeTest, basic_test)
+{
+  {
+    onert::ir::Shape shape(3);
+
+    shape.dim(0) = 1;
+    shape.dim(1) = 2;
+    shape.dim(2) = 3;
+
+    ASSERT_EQ(shape.rank(), 3);
+    ASSERT_EQ(shape.num_elements(), 6);
+    ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
+    ASSERT_EQ(shape.hasUnspecifiedDims(), false);
+  }
+  {
+    onert::ir::Shape shape; // scalar or rank is unspecified
+
+    ASSERT_EQ(shape.rank(), 0);
+    ASSERT_EQ(shape.num_elements(), 1);
+    ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), true);
+    ASSERT_EQ(shape.hasUnspecifiedDims(), false);
+  }
+}
+
+TEST(ShapeTest, neg_basic_test)
+{
+  {
+    onert::ir::Shape shape(2);
+
+    shape.dim(0) = 1;
+    shape.dim(1) = onert::ir::Shape::UNSPECIFIED_DIM;
+
+    ASSERT_EQ(shape.rank(), 2);
+    ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
+    ASSERT_EQ(shape.hasUnspecifiedDims(), true);
+    EXPECT_ANY_THROW(shape.num_elements());
+  }
+}
diff --git a/runtime/onert/core/src/ir/verifier/Verifier.test.cc b/runtime/onert/core/src/ir/verifier/Verifier.test.cc
new file mode 100644
index 000000000..1ec71cd55
--- /dev/null
+++ b/runtime/onert/core/src/ir/verifier/Verifier.test.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Verifier.h"
+
+#include "../MockNode.h"
+
+#include "ir/Graph.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+using IndexSet = onert::ir::OperandIndexSequence;
+using Mock = onert_test::ir::SimpleMock;
+
+TEST(Verifier, dag_checker)
+{
+  onert::ir::Graph graph;
+
+  onert::ir::Shape shape{3};
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  auto operand1 = graph.addOperand(shape, type);
+  auto operand2 = graph.addOperand(shape, type);
+
+  graph.addInput(operand1);
+  graph.addOutput(operand2);
+
+  graph.addOperation(std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2}));
+
+  onert::ir::verifier::DAGChecker verifier;
+
+  ASSERT_TRUE(verifier.verify(graph));
+}
+
+TEST(Verifier, neg_edge_consistency_checker_1)
+{
+  onert::ir::Graph graph;
+
+  onert::ir::Shape shape{3};
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  auto operand1 = graph.addOperand(shape, type);
+  auto operand2 = graph.addOperand(shape, type);
+
+  graph.addInput(operand1);
+  graph.addOutput(operand2);
+
+  auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
+  auto op_ind = graph.addOperation(std::move(mock_op));
+
+  graph.operands().at(operand1).removeUse(op_ind); // Manipulate the operand alone
+
+  onert::ir::verifier::EdgeChecker verifier;
+  ASSERT_FALSE(verifier.verify(graph));
+}
+
+TEST(Verifier, neg_edge_consistency_checker_2)
+{
+  onert::ir::Graph graph;
+
+  onert::ir::Shape shape{3};
+  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+  auto operand1 = graph.addOperand(shape, type);
+  auto operand2 = graph.addOperand(shape, type);
+
+  graph.addInput(operand1);
+  graph.addOutput(operand2);
+
+  auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
+  auto mock_op_ptr = mock_op.get();
+  auto op_ind = graph.addOperation(std::move(mock_op));
+
+  mock_op_ptr->setInputs({operand2}); // Manipulate the operation alone
+
+  onert::ir::verifier::EdgeChecker verifier;
+  ASSERT_FALSE(verifier.verify(graph));
+}
diff --git a/runtime/onert/core/src/util/ChromeTracingEventWriter.cc b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc
index 3fc0c8ece..d868efedf 100644
--- a/runtime/onert/core/src/util/ChromeTracingEventWriter.cc
+++ b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "util/EventWriter.h"
+#include "EventWriter.h"
 
-#include <sstream>
-#include <vector>
 #include <cassert>
+#include <sstream>
 #include <utility>
+#include <vector>
 
 // json type for ChromeTracingWriter
 namespace
diff --git a/runtime/onert/core/src/util/ConfigSource.cc b/runtime/onert/core/src/util/ConfigSource.cc
index 9da93f68a..b7fcefc7a 100644
--- a/runtime/onert/core/src/util/ConfigSource.cc
+++ b/runtime/onert/core/src/util/ConfigSource.cc
@@ -15,13 +15,15 @@
  */
 
 #include "util/ConfigSource.h"
-#include "util/GeneralConfigSource.h"
-#include "util/EnvConfigSource.h"
+#include "util/logging.h"
+
+#include <misc/EnvConfigSource.h>
+#include <misc/GeneralConfigSource.h>
+#include <misc/IConfigSource.h>
 
-#include <array>
 #include <algorithm>
+#include <array>
 #include <cassert>
-
 #include <memory>
 
 namespace onert
@@ -29,12 +31,27 @@ namespace onert
 namespace util
 {
 
+using namespace nnfw::misc;
+
 static std::unique_ptr<IConfigSource> _source;
 static std::unique_ptr<IConfigSource> _source_ext;
 
 void config_source(std::unique_ptr<IConfigSource> &&source) { _source = std::move(source); }
 void config_source_ext(std::unique_ptr<IConfigSource> &&source) { _source_ext = std::move(source); }
 
+void setConfigKeyValues(const CfgKeyValues &keyValues)
+{
+  auto configsrc = std::make_unique<GeneralConfigSource>();
+
+  for (auto it = keyValues.begin(); it != keyValues.end(); ++it)
+  {
+    VERBOSE(NNPKG_CONFIGS) << "(" << it->first << ") = (" << it->second << ")" << std::endl;
+    configsrc->set(it->first, it->second);
+  }
+
+  onert::util::config_source_ext(std::move(configsrc));
+}
+
 static IConfigSource *config_source()
 {
   if (!_source)
diff --git a/runtime/onert/core/src/util/EnvConfigSource.cc b/runtime/onert/core/src/util/EnvConfigSource.cc
deleted file mode 100644
index 0d25b7353..000000000
--- a/runtime/onert/core/src/util/EnvConfigSource.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/EnvConfigSource.h"
-
-#include <cstdlib>
-
-namespace onert
-{
-namespace util
-{
-
-std::string EnvConfigSource::get(const std::string &key) const
-{
-  const char *value = std::getenv(key.c_str());
-  if (value != nullptr)
-  {
-    return value;
-  }
-  else
-  {
-    return GeneralConfigSource::get(key);
-  }
-}
-
-} // namespace util
-} // namespace onert
diff --git a/runtime/onert/core/src/util/EventCollector.cc b/runtime/onert/core/src/util/EventCollector.cc
index 83c2649d1..c1b9c4315 100644
--- a/runtime/onert/core/src/util/EventCollector.cc
+++ b/runtime/onert/core/src/util/EventCollector.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "util/EventCollector.h"
+#include "EventCollector.h"
 
 // C++ standard libraries
 #include <chrono>
diff --git a/runtime/onert/core/src/util/EventCollector.h b/runtime/onert/core/src/util/EventCollector.h
index 774fe05ef..effb72373 100644
--- a/runtime/onert/core/src/util/EventCollector.h
+++ b/runtime/onert/core/src/util/EventCollector.h
@@ -17,12 +17,13 @@
 #ifndef __ONERT_UTIL_EVENT_COLLECTOR_H__
 #define __ONERT_UTIL_EVENT_COLLECTOR_H__
 
-#include "util/EventRecorder.h"
+#include "EventRecorder.h"
+
 #include "util/TracingCtx.h"
 
-#include <vector>
-#include <utility>
 #include <string>
+#include <utility>
+#include <vector>
 
 class EventCollector
 {
diff --git a/runtime/onert/core/src/util/EventRecorder.cc b/runtime/onert/core/src/util/EventRecorder.cc
index 5d3d5f5c6..85a588d38 100644
--- a/runtime/onert/core/src/util/EventRecorder.cc
+++ b/runtime/onert/core/src/util/EventRecorder.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "util/EventRecorder.h"
+#include "EventRecorder.h"
 
 void EventRecorder::emit(std::unique_ptr<DurationEvent> &&evt)
 {
diff --git a/runtime/onert/core/src/util/EventWriter.cc b/runtime/onert/core/src/util/EventWriter.cc
index c42c53730..ca4bd302e 100644
--- a/runtime/onert/core/src/util/EventWriter.cc
+++ b/runtime/onert/core/src/util/EventWriter.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "util/EventWriter.h"
+#include "EventWriter.h"
 
 #include <cassert>
 
diff --git a/runtime/onert/core/src/util/GeneralConfigSource.cc b/runtime/onert/core/src/util/GeneralConfigSource.cc
deleted file mode 100644
index 7d2757e58..000000000
--- a/runtime/onert/core/src/util/GeneralConfigSource.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/GeneralConfigSource.h"
-#include "util/logging.h"
-
-namespace onert
-{
-namespace util
-{
-
-std::string GeneralConfigSource::get(const std::string &key) const
-{
-  auto itr = _map.find(key);
-  if (itr == _map.end())
-  {
-    return "";
-  }
-  else
-  {
-    return itr->second;
-  }
-}
-
-void GeneralConfigSource::set(const std::string &key, const std::string &val)
-{
-  VERBOSE(GeneralConfigSource) << key << " : " << val << std::endl;
-  _map[key] = val;
-}
-
-} // namespace util
-} // namespace onert
diff --git a/runtime/onert/core/src/util/Index.test.cc b/runtime/onert/core/src/util/Index.test.cc
new file mode 100644
index 000000000..ff73e5e59
--- /dev/null
+++ b/runtime/onert/core/src/util/Index.test.cc
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/Index.h"
+
+#include <gtest/gtest.h>
+
+using Index = ::onert::util::Index<uint32_t, struct TestTag>;
+
+TEST(Index, neg_index_test)
+{
+  Index idx1{1u};
+  Index idx2{2u};
+  Index idx3{idx1};
+
+  ASSERT_EQ(idx1, 1);
+  ASSERT_EQ(idx1, 1u);
+  ASSERT_EQ(idx1.value(), 1u);
+  ASSERT_NE(idx1, idx2);
+  ASSERT_EQ(idx1, idx3);
+}
diff --git a/runtime/onert/core/src/util/MDTableEventWriter.cc b/runtime/onert/core/src/util/MDTableEventWriter.cc
index b7fbac5e2..7a8b9f234 100644
--- a/runtime/onert/core/src/util/MDTableEventWriter.cc
+++ b/runtime/onert/core/src/util/MDTableEventWriter.cc
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include "util/EventWriter.h"
+#include "EventWriter.h"
 
-#include <sstream>
-#include <vector>
-#include <unordered_map>
 #include <cassert>
-#include <utility>
 #include <map>
 #include <set>
+#include <sstream>
 #include <stdint.h>
+#include <unordered_map>
+#include <utility>
+#include <vector>
 
 // md table type
 namespace
diff --git a/runtime/onert/core/src/util/ObjectManager.test.cc b/runtime/onert/core/src/util/ObjectManager.test.cc
new file mode 100644
index 000000000..3fe735732
--- /dev/null
+++ b/runtime/onert/core/src/util/ObjectManager.test.cc
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/Index.h"
+#include "util/ObjectManager.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert;
+
+struct TestTag;
+using Index = typename util::Index<uint32_t, TestTag>;
+
+TEST(ObjectManager, emplace)
+{
+  util::ObjectManager<Index, int> man;
+
+  auto index = man.emplace(100);
+  ASSERT_EQ(man.at(index), 100);
+}
+
+TEST(ObjectManager, neg_remove_1)
+{
+  util::ObjectManager<Index, int> man;
+
+  Index index = man.emplace(100);
+  ASSERT_TRUE(man.exist(index));
+  ASSERT_EQ(man.at(index), 100);
+
+  man.remove(index);
+  ASSERT_FALSE(man.exist(index));
+}
+
+TEST(ObjectManager, neg_remove_2)
+{
+  util::ObjectManager<Index, int> man;
+
+  auto index0 = man.emplace(100);
+  auto index1 = man.emplace(200);
+  ASSERT_TRUE(man.exist(index0));
+  ASSERT_EQ(man.at(index0), 100);
+  ASSERT_TRUE(man.exist(index1));
+  ASSERT_EQ(man.at(index1), 200);
+
+  man.remove(index0);
+  ASSERT_FALSE(man.exist(index0));
+  ASSERT_TRUE(man.exist(index1));
+  ASSERT_EQ(man.at(index1), 200);
+}
+
+TEST(ObjectManager, push)
+{
+  util::ObjectManager<Index, int> man;
+
+  // Not specify index
+  auto index = man.push(std::make_unique<int>(100));
+  ASSERT_EQ(man.at(index), 100);
+
+  // Specify index
+  auto index2 = man.push(std::make_unique<int>(200), Index{33});
+  ASSERT_EQ(index2.value(), 33);
+  ASSERT_EQ(man.at(index2), 200);
+
+  auto index3 = man.push(std::make_unique<int>(300));
+  // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1)
+  ASSERT_EQ(index3.value(), 34);
+  ASSERT_EQ(man.at(index3), 300);
+
+  auto index4 = man.push(std::make_unique<int>(400), Index{22});
+  ASSERT_EQ(index4.value(), 22);
+  ASSERT_EQ(man.at(index4), 400);
+
+  auto index5 = man.push(std::make_unique<int>(500));
+  // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1)
+  ASSERT_EQ(index5.value(), 35);
+  ASSERT_EQ(man.at(index5), 500);
+}
+
+TEST(ObjectManager, neg_push)
+{
+  util::ObjectManager<Index, int> man;
+
+  // Specify index
+  auto index = man.push(std::make_unique<int>(100), Index{55});
+  ASSERT_EQ(index.value(), 55);
+  ASSERT_EQ(man.at(index), 100);
+
+  // Specify the same index
+  auto index2 = man.push(std::make_unique<int>(200), Index{55});
+  ASSERT_FALSE(index2.valid());
+}
+
+static const uint32_t kMaxUInt32 = std::numeric_limits<uint32_t>::max();
+
+TEST(ObjectManager, neg_push_undefined_index)
+{
+  util::ObjectManager<Index, int> man;
+
+  // Try inserting invalid(undefined) index
+  auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32});
+  ASSERT_FALSE(index.valid());
+  ASSERT_EQ(man.size(), 0);
+}
+
+TEST(ObjectManager, neg_push_max_index)
+{
+  util::ObjectManager<Index, int> man;
+
+  // Insert an object with maximum valid index
+  auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32 - 1});
+  ASSERT_EQ(index.value(), kMaxUInt32 - 1);
+  ASSERT_EQ(man.at(index), 100);
+  ASSERT_EQ(man.size(), 1);
+
+  // Reached to the final index so next push/emplace must fail
+  auto index2 = man.push(std::make_unique<int>(200));
+  ASSERT_EQ(man.size(), 1);
+  ASSERT_FALSE(index2.valid());
+}
+
+TEST(ObjectManager, neg_emplace_max_index)
+{
+  util::ObjectManager<Index, int> man;
+
+  // Insert an object with maximum valid index
+  auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32 - 1});
+  ASSERT_EQ(index.value(), kMaxUInt32 - 1);
+  ASSERT_EQ(man.at(index), 100);
+  ASSERT_EQ(man.size(), 1);
+
+  // Reached to the final index so next push/emplace must fail
+  auto index3 = man.emplace(200);
+  ASSERT_EQ(man.size(), 1);
+  ASSERT_FALSE(index3.valid());
+}
+
+TEST(ObjectManager, const_iterate)
+{
+  util::ObjectManager<Index, int> man;
+
+  auto index0 = man.emplace(100);
+  auto index1 = man.emplace(200);
+  auto index2 = man.emplace(300);
+
+  int sum = 0;
+  man.iterate([&](const Index &index, const int &val) { sum += val; });
+  ASSERT_EQ(sum, 600);
+}
+
+TEST(ObjectManager, non_const_iterate)
+{
+  util::ObjectManager<Index, int> man;
+
+  auto index0 = man.emplace(100);
+  auto index1 = man.emplace(200);
+  auto index2 = man.emplace(300);
+
+  man.iterate([&](const Index &index, int &val) { val += 1; });
+  ASSERT_EQ(man.at(index0), 101);
+  ASSERT_EQ(man.at(index1), 201);
+  ASSERT_EQ(man.at(index2), 301);
+}
+
+TEST(ObjectManager, set)
+{
+  util::ObjectManager<Index, int> man;
+  auto index = man.set(Index{1}, std::make_unique<int>(100)); // Insert
+  ASSERT_EQ(index, Index{1});
+  auto index2 = man.set(index, std::make_unique<int>(200)); // Overwrite
+  ASSERT_EQ(index2, index);
+  ASSERT_EQ(man.at(index2), 200);
+}
+
+TEST(ObjectManager, neg_set)
+{
+  auto v = std::make_unique<int>(100);
+  util::ObjectManager<Index, int> man;
+  auto index = man.set(Index{}, std::move(v)); // Try set with an invalid index
+  ASSERT_EQ(index, Index{});
+  ASSERT_FALSE(index.valid());
+  ASSERT_NE(v, nullptr); // v must be kept when failure
+}
+
+TEST(ObjectManager, getRawPtr)
+{
+  auto v = std::make_unique<int>(100);
+  auto v_ptr = v.get();
+  util::ObjectManager<Index, int> man;
+  auto index = man.push(std::move(v));
+  ASSERT_EQ(v_ptr, man.getRawPtr(index));
+}
+
+TEST(ObjectManager, neg_getRawPtr)
+{
+  util::ObjectManager<Index, int> man;
+  auto ptr = man.getRawPtr(Index{1});
+  ASSERT_EQ(ptr, nullptr);
+}
diff --git a/runtime/onert/core/src/util/SNPEEventWriter.cc b/runtime/onert/core/src/util/SNPEEventWriter.cc
index 6f03cfccf..4dea6d16c 100644
--- a/runtime/onert/core/src/util/SNPEEventWriter.cc
+++ b/runtime/onert/core/src/util/SNPEEventWriter.cc
@@ -14,11 +14,12 @@
  * limitations under the License.
  */
 
-#include "util/EventWriter.h"
+#include "EventWriter.h"
 
-#include <unordered_map>
 #include <json/json.h>
+
 #include <cassert>
+#include <unordered_map>
 #include <utility>
 
 /**
diff --git a/runtime/onert/core/src/util/ShapeInference.test.cc b/runtime/onert/core/src/util/ShapeInference.test.cc
new file mode 100644
index 000000000..96579bfa2
--- /dev/null
+++ b/runtime/onert/core/src/util/ShapeInference.test.cc
@@ -0,0 +1,544 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/ShapeInference.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::ir;
+
+TEST(ShapeInference, Elementwise)
+{
+  Shape lhs_shape{1, 299, 299, 3};
+  Shape rhs_shape{3};
+  auto infered_out_shape = onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.dim(0), 1);
+  ASSERT_EQ(infered_out_shape.dim(1), 299);
+  ASSERT_EQ(infered_out_shape.dim(2), 299);
+  ASSERT_EQ(infered_out_shape.dim(3), 3);
+}
+
+TEST(ShapeInference, neg_Elementwise)
+{
+  Shape lhs_shape{1, 299, 299, 3};
+  Shape rhs_shape{5, 3};
+  ASSERT_THROW(onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape), std::runtime_error);
+}
+
+TEST(ShapeInference, Pool2DNodeSame)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Stride stride{3, 7};
+  Padding padding{PaddingType::SAME};
+
+  operation::Pool2D::Param avg_pool_param{
+    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+  operation::Pool2D::Param max_pool_param{
+    operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, Pool2DNodeValid)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Stride stride{3, 7};
+  Padding padding{PaddingType::VALID};
+
+  operation::Pool2D::Param avg_pool_param{
+    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+  operation::Pool2D::Param max_pool_param{
+    operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, Pool2DNodeExplicit)
+{
+  Shape in_shape{10, 3, 5, 20};
+
+  Stride stride{3, 7};
+  Padding padding{4, 3, 2, 1};
+
+  operation::Pool2D::Param avg_pool_param{
+    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+  operation::Pool2D::Param max_pool_param{
+    operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, neg_Pool2DNode_InvalidStride)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Stride stride{0, 7};
+  Padding padding{PaddingType::SAME};
+
+  operation::Pool2D::Param avg_pool_param{
+    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+  ASSERT_THROW(onert::shape_inference::inferPoolShape(in_shape, avg_pool_param),
+               std::runtime_error);
+}
+
+TEST(ShapeInference, Conv2D)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Shape ker_shape{30, 3, 6, 20};
+
+  operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE,
+                                 Dilation{1, 1}};
+  auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+
+  param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE,
+                                   Dilation{1, 1}};
+  infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+
+  param =
+    operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
+  infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+}
+
+TEST(ShapeInference, neg_Conv2D_InvalidStride)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Shape ker_shape{30, 3, 6, 20};
+
+  operation::Conv2D::Param param{Stride{0, 0}, Padding{PaddingType::VALID}, Activation::NONE,
+                                 Dilation{1, 1}};
+  ASSERT_THROW(onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param),
+               std::runtime_error);
+}
+
+TEST(ShapeInference, DepthwiseConv2D)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Shape ker_shape{1, 3, 6, 60};
+
+  operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3,
+                                          Activation::NONE, Dilation{1, 1}};
+  auto infered_out_shape =
+    onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+
+  param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, 3,
+                                            Activation::NONE, Dilation{1, 1}};
+  infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+
+  param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, 3, Activation::NONE,
+                                            Dilation{1, 1}};
+  infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+
+  ASSERT_EQ(infered_out_shape.rank(), 4);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+}
+
+TEST(ShapeInference, neg_DepthwiseConv2D_InvalidSride)
+{
+  Shape in_shape{10, 6, 12, 20};
+  Shape ker_shape{1, 3, 6, 60};
+
+  operation::DepthwiseConv2D::Param param{Stride{3, 0}, Padding{PaddingType::VALID}, 3,
+                                          Activation::NONE, Dilation{1, 1}};
+  ASSERT_THROW(onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param),
+               std::runtime_error);
+}
+
+TEST(ShapeInference, Concat)
+{
+  {
+    Shape in1{10, 20, 30, 3, 50};
+    Shape in2{10, 20, 30, 2, 50};
+    Shape in3{10, 20, 30, 2, 50};
+
+    operation::Concat::Param param{3};
+    auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2, in3}, param);
+
+    ASSERT_EQ(infered_out_shape.rank(), 5);
+    ASSERT_EQ(infered_out_shape.dim(0), 10);
+    ASSERT_EQ(infered_out_shape.dim(1), 20);
+    ASSERT_EQ(infered_out_shape.dim(2), 30);
+    ASSERT_EQ(infered_out_shape.dim(3), 7);
+    ASSERT_EQ(infered_out_shape.dim(4), 50);
+  }
+  {
+    // case 1. when axis < 0
+    Shape in1{10, 20, 2};
+    Shape in2{10, 20, 3};
+
+    operation::Concat::Param param{-1};
+    auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
+
+    ASSERT_EQ(infered_out_shape.rank(), 3);
+    ASSERT_EQ(infered_out_shape.dim(0), 10);
+    ASSERT_EQ(infered_out_shape.dim(1), 20);
+    ASSERT_EQ(infered_out_shape.dim(2), 5);
+  }
+  {
+    // case 2. when axis < 0
+    Shape in1{2, 20, 2};
+    Shape in2{3, 20, 2};
+
+    operation::Concat::Param param{-3};
+    auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
+
+    ASSERT_EQ(infered_out_shape.rank(), 3);
+    ASSERT_EQ(infered_out_shape.dim(0), 5);
+    ASSERT_EQ(infered_out_shape.dim(1), 20);
+    ASSERT_EQ(infered_out_shape.dim(2), 2);
+  }
+}
+
+TEST(ShapeInference, neg_Concat)
+{
+  {
+    operation::Concat::Param param{2};
+    Shape in1{10, 1, 3};
+    Shape in2{10, 2, 4}; // dim[1] should be 1 but 2
+
+    EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
+  }
+  { // wrong rank
+    operation::Concat::Param param{2};
+    Shape in1{10, 2, 3, 4};
+    Shape in2{10, 2, 4}; // rank should be 4
+
+    EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
+  }
+}
+
+TEST(ShapeInference, ExpandDims)
+{
+  Shape in_shape{30, 40};
+
+  auto check = [&](int32_t axis, Shape &expected) {
+    auto actual = onert::shape_inference::inferExpandDimsShape(in_shape, axis);
+
+    ASSERT_EQ(actual.rank(), 3);
+    for (int32_t dim = 0; dim < expected.rank(); dim++)
+      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+  };
+
+  { // boundary
+    int32_t axis = 0;
+    Shape expected{1, 30, 40};
+    check(axis, expected);
+  }
+  { // boundary
+    int32_t axis = 2;
+    Shape expected{30, 40, 1};
+    check(axis, expected);
+  }
+  { // inside
+    int32_t axis = 1;
+    Shape expected{30, 1, 40};
+    check(axis, expected);
+  }
+  { // negative boundary
+    int32_t axis = -1;
+    Shape expected{30, 40, 1};
+    check(axis, expected);
+  }
+  { // negative boundary
+    int32_t axis = -3;
+    Shape expected{1, 30, 40};
+    check(axis, expected);
+  }
+}
+
+TEST(ShapeInference, neg_ExpandDims)
+{
+  Shape in_shape{30, 40};
+
+  { // over boundary
+    int32_t axis = 3;
+    ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
+  }
+  { // over boundary
+    int32_t axis = -4;
+    ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
+  }
+}
+
+TEST(ShapeInference, FullyConnected)
+{
+  Shape in_shape{3, 4, 5, 6};
+  Shape ker_shape{3, 10};
+  auto infered_out_shape = onert::shape_inference::inferFullyConnectedShape(in_shape, ker_shape);
+
+  ASSERT_EQ(infered_out_shape.rank(), 2);
+  ASSERT_EQ(infered_out_shape.dim(0), 36);
+  ASSERT_EQ(infered_out_shape.dim(1), 3);
+}
+
+TEST(ShapeInference, Transpose)
+{
+  auto check = [&](Shape &in_shape, std::vector<int> perm, Shape &expected) {
+    // pre-conditions
+    ASSERT_EQ(in_shape.rank(), perm.size());
+    ASSERT_EQ(expected.rank(), perm.size());
+    auto inferred_out_shape =
+      onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size());
+    // post-conditions
+    ASSERT_EQ(inferred_out_shape.rank(), perm.size());
+    for (int32_t dim = 0; dim < expected.rank(); dim++)
+    {
+      ASSERT_EQ(inferred_out_shape.dim(dim), expected.dim(dim));
+    }
+  };
+  // check for 2-D
+  {
+    Shape in_shape{2, 3};
+    std::vector<int> perm = {1, 0};
+    Shape expected{3, 2};
+    // int32_t rank = 2;
+    check(in_shape, perm, expected);
+  }
+  // check for 3-D
+  {
+    Shape in_shape{1, 2, 3};
+    std::vector<int> perm = {2, 0, 1};
+    Shape expected{3, 1, 2};
+    // int32_t rank = 3;
+    check(in_shape, perm, expected);
+  }
+  // check for 4-D
+  {
+    Shape in_shape{1, 2, 3, 4};
+    std::vector<int> perm = {1, 3, 0, 2};
+    Shape expected{2, 4, 1, 3};
+    // int32_t rank = 4;
+    check(in_shape, perm, expected);
+  }
+}
+
+TEST(ShapeInference, neg_Transpose)
+{
+  Shape in_shape{1, 2, 3};
+  // Invalid parameter size
+  {
+    std::vector<int> perm = {2, 0, 1, 0};
+    // int32_t rank = 3;
+    ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
+                 std::runtime_error);
+  }
+  // Invalid parameter value
+  {
+    std::vector<int> perm = {2, 0, 3};
+    // int32_t rank = 3;
+    ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
+                 std::runtime_error);
+  }
+}
+
+TEST(ShapeInference, Gather)
+{
+  auto check = [&](Shape &input, Shape &indices, Shape &expected, int32_t axis) {
+    int rank = input.rank();
+    auto actual = onert::shape_inference::inferGatherShape(input, indices, axis, rank);
+
+    ASSERT_EQ(actual.rank(), expected.rank());
+
+    for (int32_t dim = 0; dim < expected.rank(); dim++)
+      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+  };
+
+  // check for 2-D, 3-D, axis 0
+  {
+    Shape input{3, 4};
+    Shape indices{1, 1, 2};
+    int32_t axis = 0;
+    Shape expected{1, 1, 2, 4};
+    check(input, indices, expected, axis);
+  }
+
+  // check for 2-D, 3-D, axis 1
+  {
+    Shape input{3, 4};
+    Shape indices{1, 2, 1};
+    int32_t axis = 1;
+    Shape expected{3, 1, 2, 1};
+    check(input, indices, expected, axis);
+  }
+
+  // check for 3-D, 2-D, axis 0
+  {
+    Shape input{2, 3, 4};
+    Shape indices{1, 2};
+    int32_t axis = 0;
+    Shape expected{1, 2, 3, 4};
+    check(input, indices, expected, axis);
+  }
+
+  // check for 3-D, 2-D, axis 2
+  {
+    Shape input{2, 3, 4};
+    Shape indices{2, 1};
+    int32_t axis = 2;
+    Shape expected{2, 3, 2, 1};
+    check(input, indices, expected, axis);
+  }
+
+  // check for 4D, axis 0
+  {
+    Shape input{1, 2, 3, 4};
+    Shape indices{2};
+    int32_t axis = 0;
+    Shape expected{2, 2, 3, 4};
+    check(input, indices, expected, axis);
+  }
+}
+
+TEST(ShapeInference, BCQFullyConnected)
+{
+  auto check = [&](Shape &in_shape, Shape &cluster_shape, std::vector<int> cluster,
+                   Shape &expected) {
+    auto actual =
+      onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, cluster.data());
+    ASSERT_EQ(actual.rank(), expected.rank());
+
+    for (int32_t dim = 0; dim < expected.rank(); dim++)
+      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+  };
+
+  {
+    Shape in_shape{10, 1};
+    Shape cluster_shape{3, 2};
+    std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+
+    Shape expected{30, 1};
+    check(in_shape, cluster_shape, cluster, expected);
+  }
+
+  {
+    Shape in_shape{1, 1};
+    Shape cluster_shape{1, 2};
+    std::vector<int> cluster = {3, 50};
+
+    Shape expected{50, 1};
+    check(in_shape, cluster_shape, cluster, expected);
+  }
+}
+
+TEST(ShapeInference, BCQGather)
+{
+  auto check = [&](Shape &indices_shape, Shape &cluster_shape, std::vector<int> cluster,
+                   uint32_t hidden_size, uint32_t axis, int rank, Shape &expected) {
+    operation::BCQGather::Param param{hidden_size, axis};
+    auto actual = onert::shape_inference::inferBCQGatherShape(indices_shape, cluster_shape,
+                                                              cluster.data(), rank, param);
+    ASSERT_EQ(actual.rank(), expected.rank());
+
+    for (int32_t dim = 0; dim < expected.rank(); dim++)
+      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+  };
+
+  {
+    Shape indices_shape{5, 1};
+    Shape cluster_shape{3, 2};
+    std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+    uint32_t hidden_size = 10;
+    uint32_t axis = 0;
+    int rank = 2;
+
+    Shape expected{5, 1, 10};
+    check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
+  }
+
+  {
+    Shape indices_shape{5, 1};
+    Shape cluster_shape{3, 2};
+    std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+    uint32_t hidden_size = 10;
+    uint32_t axis = 1;
+    int rank = 2;
+
+    Shape expected{30, 5, 1};
+    check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
+  }
+}
diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h
index 5649f286d..cf080abbc 100644
--- a/runtime/onert/frontend/base_loader/include/base_loader.h
+++ b/runtime/onert/frontend/base_loader/include/base_loader.h
@@ -65,10 +65,10 @@ public:
   /**
    * @brief Construct a new Loader object
    *
-   * @param graph reference on subgraphs
+   * @param model reference to model
    */
-  explicit BaseLoader(std::unique_ptr<ir::Subgraphs> &subgs)
-    : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _subgraphs(subgs), _model{nullptr},
+  explicit BaseLoader(std::unique_ptr<ir::Model> &model)
+    : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _model(model), _domain_model{nullptr},
       _tensor_names(std::make_shared<std::unordered_map<ir::OperandIndex, std::string>>())
   {
     _use_mmaped_data = util::getConfigBool(util::config::USE_MMAPED_DATA);
@@ -114,7 +114,7 @@ protected:
   // Get BuiltinOperator
   BuiltinOperator getBuiltinOperator(const Operator *op)
   {
-    auto const builtin_opcode = _model->operator_codes()->Get(op->opcode_index());
+    auto const builtin_opcode = _domain_model->operator_codes()->Get(op->opcode_index());
     auto builtin_op = builtin_opcode->builtin_code();
     if (builtin_op < BuiltinOperator::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES)
       builtin_op = static_cast<BuiltinOperator>(builtin_opcode->deprecated_builtin_code());
@@ -176,7 +176,7 @@ private:
 
   void verifySubgraphIndex(int subg_index)
   {
-    const auto num_subgraphs = _model->subgraphs()->size();
+    const auto num_subgraphs = _domain_model->subgraphs()->size();
     if (subg_index < 0 || subg_index >= static_cast<int32_t>(num_subgraphs))
       throw std::runtime_error{std::string{"Invalid subgraph index - "} +
                                std::to_string(subg_index)};
@@ -189,9 +189,9 @@ protected:
   int32_t _pagesize;
   // loaded file description
   int _fd;
-  // Reference on loadable subgraphs
-  std::unique_ptr<ir::Subgraphs> &_subgraphs;
-  const Model *_model;
+  // Reference to ir::model (to be loaded from _domain_model)
+  std::unique_ptr<ir::Model> &_model;
+  const Model *_domain_model;
   // Maps Tensor indices to onert Operands.
   std::vector<ir::OperandIndex> _tensor_to_operand;
   std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> _tensor_names;
@@ -290,6 +290,8 @@ ir::DataType BaseLoader<LoaderDomain>::BaseLoader::tensorTypeToDataType(const Te
     case TensorType::TensorType_INT8:
       return ir::DataType::QUANT_INT8_ASYMM;
     // case TensorType::TensorType_FLOAT64
+    case TensorType::TensorType_UINT32:
+      return ir::DataType::UINT32;
     default:
       throw std::runtime_error(
         std::string("Unsupported tensor type: ").append(EnumNameTensorType(type)));
@@ -358,7 +360,7 @@ ir::OperandIndex BaseLoader<LoaderDomain>::loadOperand(const Tensor *tensor, ir:
   const auto operand_index = subg.addOperand(shape, type_info);
 
   // Constant tensors are indicated by non-empty data.
-  const auto *data = _model->buffers()->Get(tensor->buffer())->data();
+  const auto *data = _domain_model->buffers()->Get(tensor->buffer())->data();
   if (data != nullptr)
   {
     using std::ptrdiff_t;
@@ -1037,7 +1039,7 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg)
   assert(op->custom_options_format() == CustomOptionsFormat::CustomOptionsFormat_FLEXBUFFERS &&
          "Unsupported custom operation options format");
 
-  auto *op_code = _model->operator_codes()->Get(op->opcode_index());
+  auto *op_code = _domain_model->operator_codes()->Get(op->opcode_index());
   auto custom_op_name = op_code->custom_code()->str();
 
   enum class BuiltinOP
@@ -1670,7 +1672,7 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg
 template <typename LoaderDomain> void BaseLoader<LoaderDomain>::loadModel()
 {
   LoaderDomain::VerifyModelBuffer(*_verifier.get());
-  _model = LoaderDomain::GetModel(_base);
+  _domain_model = LoaderDomain::GetModel(_base);
   // Version unused
   // const auto version = _model->version();
   // Description unused
@@ -1678,14 +1680,14 @@ template <typename LoaderDomain> void BaseLoader<LoaderDomain>::loadModel()
   // Metabuffer unsued
   // const auto *metadata_buffer = _model->metadata_buffer();
   // Load subgraphs and map operations on subgraph
-  const auto domain_subgraphs = _model->subgraphs();
-  auto subgraphs = std::make_unique<ir::Subgraphs>();
-  for (uint32_t subgraph_index = 0; subgraph_index < domain_subgraphs->size(); ++subgraph_index)
+  const auto subgraphs = _domain_model->subgraphs();
+  auto model = std::make_unique<ir::Model>();
+  for (uint32_t subgraph_index = 0; subgraph_index < subgraphs->size(); ++subgraph_index)
   {
-    auto subg = loadSubgraph((*_model->subgraphs())[subgraph_index]);
-    subgraphs->push(ir::SubgraphIndex{subgraph_index}, std::move(subg));
+    auto subg = loadSubgraph((*_domain_model->subgraphs())[subgraph_index]);
+    model->push(ir::SubgraphIndex{subgraph_index}, std::move(subg));
   }
-  _subgraphs = std::move(subgraphs);
+  _model = std::move(model);
 }
 
 } // namespace base_loader
diff --git a/runtime/onert/frontend/circle/include/circle_loader.h b/runtime/onert/frontend/circle/include/circle_loader.h
index 44bf28056..87e5d70ae 100644
--- a/runtime/onert/frontend/circle/include/circle_loader.h
+++ b/runtime/onert/frontend/circle/include/circle_loader.h
@@ -25,8 +25,8 @@ namespace onert
 {
 namespace circle_loader
 {
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename);
-std::unique_ptr<ir::Subgraphs> loadModel(uint8_t *buffer, size_t size);
+std::unique_ptr<ir::Model> loadModel(const std::string &filename);
+std::unique_ptr<ir::Model> loadModel(uint8_t *buffer, size_t size);
 } // namespace circle_loader
 } // namespace onert
 
diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc
index aae831d61..5abcc9cd0 100644
--- a/runtime/onert/frontend/circle/src/circle_loader.cc
+++ b/runtime/onert/frontend/circle/src/circle_loader.cc
@@ -228,20 +228,20 @@ void CircleLoader::loadBCQFullyConnected(const Operator *op, ir::Graph &subg)
 
 } // namespace
 
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename)
+std::unique_ptr<ir::Model> loadModel(const std::string &filename)
 {
-  auto subgraphs = std::make_unique<ir::Subgraphs>();
-  CircleLoader loader(subgraphs);
+  auto model = std::make_unique<ir::Model>();
+  CircleLoader loader(model);
   loader.loadFromFile(filename);
-  return subgraphs;
+  return model;
 }
 
-std::unique_ptr<ir::Subgraphs> loadModel(uint8_t *buffer, size_t size)
+std::unique_ptr<ir::Model> loadModel(uint8_t *buffer, size_t size)
 {
-  auto subgraphs = std::make_unique<ir::Subgraphs>();
-  CircleLoader loader(subgraphs);
+  auto model = std::make_unique<ir::Model>();
+  CircleLoader loader(model);
   loader.loadFromBuffer(buffer, size);
-  return subgraphs;
+  return model;
 }
 
 } // namespace circle_loader
diff --git a/runtime/onert/frontend/nnapi/execution.cc b/runtime/onert/frontend/nnapi/execution.cc
index 56ca5ef00..19636a84d 100644
--- a/runtime/onert/frontend/nnapi/execution.cc
+++ b/runtime/onert/frontend/nnapi/execution.cc
@@ -37,7 +37,7 @@ int ANeuralNetworksExecution_create(ANeuralNetworksCompilation *compilation,
     return ANEURALNETWORKS_UNEXPECTED_NULL;
   }
 
-  std::shared_ptr<onert::exec::ExecutorMap> executors;
+  std::shared_ptr<onert::exec::Executors> executors;
 
   compilation->publish(executors);
 
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
index 63036a398..bb247b97f 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
@@ -18,11 +18,12 @@
 
 #include "util/logging.h"
 
+using namespace onert;
+
 // TODO Support multiple subgraphs
 ANeuralNetworksCompilation::ANeuralNetworksCompilation(const ANeuralNetworksModel *model) noexcept
-  : _subgraphs{model->getSubGraphs()}, _tracing_ctx{std::make_unique<onert::util::TracingCtx>(
-                                         _subgraphs.get())},
-    _compiler{new onert::compiler::Compiler{_subgraphs, _tracing_ctx.get()}}
+  : _model{model->getModel()}, _coptions{compiler::CompilerOptions::fromGlobalConfig()},
+    _compiler{std::make_shared<compiler::Compiler>(_model, *_coptions)}
 {
   if (model->allowedToFp16())
   {
@@ -34,7 +35,7 @@ bool ANeuralNetworksCompilation::finish() noexcept
 {
   try
   {
-    _executors = _compiler->compile();
+    _artifact = _compiler->compile();
   }
   catch (const std::exception &e)
   {
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
index bd61f9d86..dff5c6dc6 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
@@ -21,8 +21,8 @@
 
 #include "compiler/Compiler.h"
 #include "ir/Graph.h"
-#include "ir/Subgraphs.h"
-#include "exec/IExecutor.h"
+#include "ir/Model.h"
+#include "exec/Executors.h"
 #include "util/TracingCtx.h"
 
 struct ANeuralNetworksCompilation
@@ -34,23 +34,16 @@ public:
   bool finish() noexcept;
 
   onert::compiler::State state(void) noexcept { return _compiler->state(); }
-  void publish(std::shared_ptr<onert::exec::ExecutorMap> &executors) noexcept
+  void publish(std::shared_ptr<onert::exec::Executors> &executors) noexcept
   {
-    executors = _executors;
+    executors = _artifact ? _artifact->_executors : nullptr;
   }
 
 private:
-  std::shared_ptr<onert::ir::Subgraphs> _subgraphs;
-  // TODO Refine the ownership of TracingCtx
-  // In case of nnfw API, nnfw_session has ownership of TracingCtx.
-  // In case of nnapi, there is no concept of session and primary model might have the ownership
-  // of TracingCtx.
-  // Since we don't support multiple models yet with nnapi in ONE, let's implement this later
-  // and let's make it work with one model for now.
-  std::unique_ptr<onert::util::TracingCtx> _tracing_ctx;
-
+  std::shared_ptr<onert::ir::Model> _model;
+  std::unique_ptr<onert::compiler::CompilerOptions> _coptions;
   std::shared_ptr<onert::compiler::Compiler> _compiler;
-  std::shared_ptr<onert::exec::ExecutorMap> _executors;
+  std::shared_ptr<onert::compiler::CompilerArtifact> _artifact;
 };
 
 #endif
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
index 70c5d2a4b..110c7cd55 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
@@ -26,7 +26,7 @@
 struct ANeuralNetworksExecution
 {
 public:
-  ANeuralNetworksExecution(const std::shared_ptr<onert::exec::ExecutorMap> &executors)
+  ANeuralNetworksExecution(const std::shared_ptr<onert::exec::Executors> &executors)
     : _execution{std::make_shared<onert::exec::Execution>(executors)}
   {
     // DO NOTHING
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
index 81ffa26f3..a641368ec 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
@@ -273,16 +273,16 @@ void ANeuralNetworksModel::fillOptionalOperand(void)
   });
 }
 
-std::shared_ptr<onert::ir::Subgraphs> ANeuralNetworksModel::getSubGraphs() const
+std::shared_ptr<onert::ir::Model> ANeuralNetworksModel::getModel() const
 {
-  auto all_subgs = std::make_shared<onert::ir::Subgraphs>();
+  auto model = std::make_shared<onert::ir::Model>();
 
-  all_subgs->push(onert::ir::SubgraphIndex{0}, _graph);
+  model->push(onert::ir::SubgraphIndex{0}, _graph);
   // TODO Find all child subgraphs and copy them to all_subgs
   // Must find the same subgraph by using to compare pointer of subgraphs and set subgraph's index
   // to operands of control flow operations
   // Must clean all child subgraphs's pointer to prevent memory leak in case of that graph has
   // subgraph itself recursively
 
-  return all_subgs;
+  return model;
 }
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h
index 4301193d6..04f4cf0f2 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h
@@ -22,7 +22,7 @@
 #include <NeuralNetworksEx.h>
 
 #include "ir/Graph.h"
-#include "ir/Subgraphs.h"
+#include "ir/Model.h"
 
 struct ANeuralNetworksModel
 {
@@ -59,7 +59,7 @@ public:
   size_t operandSize(uint32_t index) noexcept;
   bool isUsageSet(uint32_t index) noexcept;
   bool isOperationOutput(uint32_t index) noexcept;
-  std::shared_ptr<onert::ir::Subgraphs> getSubGraphs() const;
+  std::shared_ptr<onert::ir::Model> getModel() const;
 
 private:
   void setOptionalOperand(const onert::ir::OperandIndex idx);
diff --git a/runtime/onert/frontend/tflite/include/tflite_loader.h b/runtime/onert/frontend/tflite/include/tflite_loader.h
index dda34cc6a..cf17863f5 100644
--- a/runtime/onert/frontend/tflite/include/tflite_loader.h
+++ b/runtime/onert/frontend/tflite/include/tflite_loader.h
@@ -26,7 +26,7 @@ namespace onert
 namespace tflite_loader
 {
 
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename);
+std::unique_ptr<ir::Model> loadModel(const std::string &filename);
 
 } // namespace tflite_loader
 } // namespace onert
diff --git a/runtime/onert/frontend/tflite/src/tflite_loader.cc b/runtime/onert/frontend/tflite/src/tflite_loader.cc
index 3b160473d..fe69e4e2a 100644
--- a/runtime/onert/frontend/tflite/src/tflite_loader.cc
+++ b/runtime/onert/frontend/tflite/src/tflite_loader.cc
@@ -154,12 +154,12 @@ void TFLiteLoader::loadBatchMatMul(const Operator *op, ir::Graph &subg)
 
 } // namespace
 
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename)
+std::unique_ptr<ir::Model> loadModel(const std::string &filename)
 {
-  auto subgraphs = std::make_unique<ir::Subgraphs>();
-  TFLiteLoader loader(subgraphs);
+  auto model = std::make_unique<ir::Model>();
+  TFLiteLoader loader(model);
   loader.loadFromFile(filename);
-  return subgraphs;
+  return model;
 }
 
 } // namespace tflite_loader
diff --git a/runtime/onert/frontend/trix/CMakeLists.txt b/runtime/onert/frontend/trix/CMakeLists.txt
index 7a0df4eaa..8d9063f6c 100644
--- a/runtime/onert/frontend/trix/CMakeLists.txt
+++ b/runtime/onert/frontend/trix/CMakeLists.txt
@@ -2,7 +2,7 @@ if (NOT BUILD_TRIX_LOADER)
   return()
 endif ()
 
-nnfw_find_package(TRIXEngine EXACT 2.5.0 QUIET)
+nnfw_find_package(TRIXEngine QUIET 2.5.0)
 if(TRIXEngine_FOUND)
   list(APPEND SOURCES src/trix_loader.cc)
 else()
diff --git a/runtime/onert/frontend/trix/include/trix_loader.h b/runtime/onert/frontend/trix/include/trix_loader.h
index 297d5ec28..26d6a3c56 100644
--- a/runtime/onert/frontend/trix/include/trix_loader.h
+++ b/runtime/onert/frontend/trix/include/trix_loader.h
@@ -27,7 +27,7 @@ namespace trix_loader
 /**
  * @throw runtime_error when tvn path is wrong or tvn is invalid
  */
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename);
+std::unique_ptr<ir::Model> loadModel(const std::string &filename);
 } // namespace trix_loader
 } // namespace onert
 
diff --git a/runtime/onert/frontend/trix/src/trix_loader.cc b/runtime/onert/frontend/trix/src/trix_loader.cc
index e2995bbd1..cdf239648 100644
--- a/runtime/onert/frontend/trix/src/trix_loader.cc
+++ b/runtime/onert/frontend/trix/src/trix_loader.cc
@@ -67,11 +67,11 @@ void TrixMetaReader::init(const char *path)
   _meta = getNPUmodel_metadata(path, false);
   if (_meta == nullptr)
   {
-    throw std::runtime_error("Failed to get TRIV2 model metadata");
+    throw std::runtime_error("Failed to get TRIX model metadata");
   }
   if (NPUBIN_VERSION(_meta->magiccode) != 3)
   {
-    throw std::runtime_error("TRIV2 model metadata version mismatched.");
+    throw std::runtime_error("TRIX model metadata version mismatched.");
   }
 }
 
@@ -81,9 +81,9 @@ public:
   /**
    * @brief Construct a new Loader object
    *
-   * @param graph reference on subgraphs
+   * @param model reference on model
    */
-  explicit TrixLoader(std::unique_ptr<ir::Subgraphs> &subgs) : _subgraphs(subgs) {}
+  explicit TrixLoader(std::unique_ptr<ir::Model> &model) : _model(model) {}
 
   /**
    * @brief Load a model from file
@@ -97,7 +97,6 @@ private:
    * @throw runtime_error when tvn path is wrong or tvn is invalid
    */
   void loadModel();
-  void loadSubgraphs();
   std::unique_ptr<ir::Graph> loadSubgraph();
   void loadOperands(ir::Graph &subg);
   ir::OperandIndex loadOperandFromInput(uint32_t i, ir::Graph &subg);
@@ -112,8 +111,11 @@ private:
 protected:
   /** path to model (e.g. tvn) */
   std::string _model_path;
+  /** original IO shapes */
+  std::vector<ir::Shape> _origin_input_shapes;
+  std::vector<ir::Shape> _origin_output_shapes;
   /** Reference on loadable subgraphs */
-  std::unique_ptr<ir::Subgraphs> &_subgraphs;
+  std::unique_ptr<ir::Model> &_model;
   TrixMetaReader _meta;
 };
 
@@ -154,6 +156,8 @@ void TrixLoader::loadBulk(ir::Graph &subg)
 {
   ir::operation::Bulk::Param param;
   param.binary_path = _model_path;
+  param.origin_input_shapes = _origin_input_shapes;
+  param.origin_output_shapes = _origin_output_shapes;
 
   ir::OperandIndexSequence inputs;
   ir::OperandIndexSequence outputs;
@@ -175,6 +179,7 @@ ir::OperandIndex TrixLoader::loadOperandFromInput(uint32_t idx, ir::Graph &subg)
   ir::TypeInfo type_info(toDataType(_meta.input_seg_quant_type(idx)),
                          _meta.input_seg_quant_scale(idx), _meta.input_seg_quant_zp(idx));
 
+  _origin_input_shapes.push_back(shape);
   // Create operand
   const auto operand_index = subg.addOperand(shape, type_info);
   return operand_index;
@@ -191,6 +196,7 @@ ir::OperandIndex TrixLoader::loadOperandFromOutput(uint32_t idx, ir::Graph &subg
   ir::TypeInfo type_info(toDataType(_meta.output_seg_quant_type(idx)),
                          _meta.output_seg_quant_scale(idx), _meta.output_seg_quant_zp(idx));
 
+  _origin_output_shapes.push_back(shape);
   // Create operand
   const auto operand_index = subg.addOperand(shape, type_info);
   return operand_index;
@@ -237,15 +243,13 @@ std::unique_ptr<ir::Graph> TrixLoader::loadSubgraph()
   return subg;
 }
 
-void TrixLoader::loadSubgraphs()
+void TrixLoader::loadModel()
 {
   // one subgraph only
   auto subg = loadSubgraph();
-  _subgraphs->push(ir::SubgraphIndex(0), std::move(subg));
+  _model->push(ir::SubgraphIndex(0), std::move(subg));
 }
 
-void TrixLoader::loadModel() { loadSubgraphs(); }
-
 void TrixLoader::loadFromFile(const std::string &file_path)
 {
   // model path will be used to set Bulk param
@@ -255,12 +259,12 @@ void TrixLoader::loadFromFile(const std::string &file_path)
   loadModel();
 }
 
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename)
+std::unique_ptr<ir::Model> loadModel(const std::string &filename)
 {
-  auto subgraphs = std::make_unique<ir::Subgraphs>();
-  TrixLoader loader(subgraphs);
+  auto model = std::make_unique<ir::Model>();
+  TrixLoader loader(model);
   loader.loadFromFile(filename);
-  return subgraphs;
+  return model;
 }
 } // namespace trix_loader
 } // namespace onert
diff --git a/runtime/onert/frontend/trix/src/trix_loader_dummy.cc b/runtime/onert/frontend/trix/src/trix_loader_dummy.cc
index 9fc8e1ff2..eecbd2217 100644
--- a/runtime/onert/frontend/trix/src/trix_loader_dummy.cc
+++ b/runtime/onert/frontend/trix/src/trix_loader_dummy.cc
@@ -22,10 +22,10 @@ namespace onert
 {
 namespace trix_loader
 {
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &)
+std::unique_ptr<ir::Model> loadModel(const std::string &)
 {
-  auto subgraphs = std::make_unique<ir::Subgraphs>();
-  return subgraphs;
+  auto model = std::make_unique<ir::Model>();
+  return model;
 }
 } // namespace trix_loader
 } // namespace onert
diff --git a/runtime/onert/test/CMakeLists.txt b/runtime/onert/test/CMakeLists.txt
deleted file mode 100644
index 38899976d..000000000
--- a/runtime/onert/test/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-set(TEST_ONERT test_onert)
-
-file(GLOB_RECURSE TESTS "*.cc")
-
-add_executable(${TEST_ONERT} ${TESTS})
-
-target_include_directories(${TEST_ONERT} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../core/src)
-
-target_link_libraries(${TEST_ONERT} onert_core)
-target_link_libraries(${TEST_ONERT} gtest)
-target_link_libraries(${TEST_ONERT} gtest_main)
-target_link_libraries(${TEST_ONERT} ${LIB_PTHREAD} dl)
-add_test(${TEST_ONERT} ${TEST_ONERT})
-
-install(TARGETS ${TEST_ONERT} DESTINATION unittest_standalone)
diff --git a/runtime/onert/test/core/compiler/HEScheduler.cc b/runtime/onert/test/core/compiler/HEScheduler.cc
deleted file mode 100644
index 514c01485..000000000
--- a/runtime/onert/test/core/compiler/HEScheduler.cc
+++ /dev/null
@@ -1,573 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <compiler/HEScheduler.h>
-#include <exec/ExecTime.h>
-
-#include <ir/Shape.h>
-#include <ir/InternalType.h>
-#include <ir/TypeInfo.h>
-#include <ir/DataType.h>
-
-#include <ir/operation/BinaryArithmetic.h>
-#include <ir/operation/FullyConnected.h>
-
-#include <gtest/gtest.h>
-
-namespace
-{
-using namespace onert;
-using namespace ir;
-using namespace backend;
-using namespace operation;
-using namespace exec;
-
-//
-// Mock backends classes
-//
-
-struct MockConfigCPU : public IConfig
-{
-  std::string id() override { return "cpu"; }
-  bool initialize() override { return true; };
-  bool supportPermutation() override { return false; }
-  Layout supportLayout(const Operation &, Layout) override { return Layout::UNKNOWN; }
-  bool supportDynamicTensor() override { return false; }
-  bool supportFP16() override { return false; }
-};
-
-class MockBackendContext : public BackendContext
-{
-public:
-  using BackendContext::BackendContext;
-  ITensorRegistry *genTensors() override { return nullptr; }
-  FunctionMap genKernels() override { return {}; }
-};
-
-struct MockBackendCPU : public Backend
-{
-  std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigCPU>(); }
-  std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
-  {
-    return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
-  }
-};
-
-struct MockConfigGPU : public IConfig
-{
-  std::string id() override { return "gpu"; }
-  bool initialize() override { return true; };
-  bool supportPermutation() override { return false; }
-  ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
-  {
-    return ir::Layout::UNKNOWN;
-  }
-  bool supportDynamicTensor() override { return false; }
-  bool supportFP16() override { return false; }
-};
-
-struct MockBackendGPU : public Backend
-{
-  std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigGPU>(); }
-  std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
-  {
-    return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
-  }
-};
-
-struct MockConfigNPU : public IConfig
-{
-  std::string id() override { return "npu"; }
-  bool initialize() override { return true; };
-  bool supportPermutation() override { return false; }
-  ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
-  {
-    return ir::Layout::UNKNOWN;
-  }
-  bool supportDynamicTensor() override { return false; }
-  bool supportFP16() override { return false; }
-};
-
-struct MockBackendNPU : public Backend
-{
-  std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigNPU>(); }
-  std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
-  {
-    return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
-  }
-};
-
-//
-// Constants
-//
-
-const int OPERAND_ELEMS = 268203;
-const int OPERAND_SIZE = OPERAND_ELEMS * 4;
-const int OPERATION_SIZE = OPERAND_SIZE * 3;
-
-const std::string LINEAR("Linear");
-const std::string DATAFLOW("Dataflow");
-const std::string PARALLEL("Parallel");
-
-//
-// Helper functions
-//
-
-// Set executor through environment variable
-void setExecutor(const std::string &executor) { setenv("EXECUTOR", executor.c_str(), true); }
-
-// Set profiling mode through environment variable
-void setProfilingMode(const bool value) { setenv("PROFILING_MODE", value ? "1" : "0", true); }
-
-// Calculate operation size by addition sizes of all input and output operands
-uint32_t calcOpSize(const std::shared_ptr<Graph> &graph, const OperationIndex &op_idx)
-{
-  uint32_t size = 0;
-  const auto &op = graph->operations().at(op_idx);
-  for (const auto &ind : op.getInputs() + op.getOutputs())
-    size += graph->operands().at(ind).info().total_size();
-  return size;
-}
-
-// Set execution operation time. This method is needed since ExecutionTime has only
-// 'updateOperationExecTime' method.
-void setOperationExecTime(ExecTime &et, const Backend *backend, const std::string &operation,
-                          bool quant, uint32_t op_size, int64_t time)
-{
-  // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
-  assert(time > 0);
-  int64_t prev_time = et.getOperationExecTime(backend, operation, quant, op_size);
-  int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
-  et.updateOperationExecTime(backend, operation, quant, op_size, time_to_set);
-  assert(et.getOperationExecTime(backend, operation, quant, op_size) == time);
-}
-
-// Set same execution time for all given backends/operations
-void setOperationsExecutionTime(const std::vector<const Backend *> &backends,
-                                const std::vector<std::string> &op_names,
-                                const std::vector<uint32_t> &op_sizes, int64_t exec_time)
-{
-  assert(op_names.size() == op_sizes.size());
-  ExecTime et(backends);
-  for (int i = 0; i < op_names.size(); ++i)
-  {
-    for (auto &backend : backends)
-      setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
-  }
-  et.storeOperationsExecTime();
-}
-
-// Set permute time from one backend to another. This method is needed since ExecutionTime has only
-// 'updatePermuteTime' method.
-void setPermutationTime(ExecTime &et, const Backend *from_backend, const Backend *to_backend,
-                        bool quant, uint32_t op_size, int64_t time)
-{
-  // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
-  assert(time > 0);
-  int64_t prev_time = et.getPermuteTime(from_backend, to_backend, quant, op_size);
-  int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
-  et.updatePermuteTime(from_backend, to_backend, quant, op_size, time_to_set);
-  assert(et.getPermuteTime(from_backend, to_backend, quant, op_size) == time);
-}
-
-// Set same permutation time between all given backends
-void setPermutationsExecutionTime(const std::vector<const Backend *> &backends,
-                                  const int operand_size, const int64_t exec_time)
-{
-  ExecTime et(backends);
-  for (const auto &backend : backends)
-  {
-    for (auto &other_backend : backends)
-    {
-      if (backend == other_backend)
-        continue;
-      setPermutationTime(et, backend, other_backend, false, operand_size, exec_time);
-    }
-  }
-  et.storeOperationsExecTime();
-}
-
-//
-// Functions for creating graphs
-//
-
-using OIS = OperandIndexSequence;
-
-template <typename NodeT, typename... Types>
-OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args)
-{
-  auto op = std::make_unique<NodeT>(std::forward<Types>(args)...);
-  auto op_idx = graph->addOperation(std::move(op));
-  // For now in scheduler test all operations in tested graphs has same size (for simplicity)
-  assert(calcOpSize(graph, op_idx) == OPERATION_SIZE);
-  return op_idx;
-}
-
-// Create straight graph: Add->Sub->Mul
-std::shared_ptr<Graph> createStraightGraph()
-{
-  auto graph = std::make_shared<Graph>();
-  const TypeInfo float_op(DataType::FLOAT32);
-
-  // Create add node
-  auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
-  create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
-
-  // Create sub node
-  auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
-  create<BinaryArithmetic>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params);
-
-  // Create mul node
-  auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
-  create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params);
-
-  graph->verify();
-  return graph;
-}
-
-/* Create branched graph:
- *       [Add]
- *      //   \\
- *   [Mul1]  [FC2]
- *     ||     ||
- *   [Mul2]  [FC2]
- *      \\   //
- *       [Sub]
- */
-std::shared_ptr<Graph> createBranchedGraph()
-{
-  auto graph = std::make_shared<Graph>();
-  const TypeInfo float_op(DataType::FLOAT32);
-
-  // Create add node
-  auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
-  create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
-
-  // Create mul1 node
-  auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
-  create<BinaryArithmetic>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx},
-                           mul1_op_params);
-
-  // Create mul2 node
-  auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
-  create<BinaryArithmetic>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx},
-                           mul2_op_params);
-
-  // Create fc1 node
-  auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  FullyConnected::Param fc1_op_params{Activation::NONE};
-  create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params);
-
-  // Create fc2 node
-  auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  FullyConnected::Param fc2_op_params{Activation::NONE};
-  create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params);
-
-  // Create sub node
-  auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
-  create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params);
-
-  graph->verify();
-  return graph;
-}
-
-//
-// Tests setup/teardown
-//
-
-// SetUp/TearDown methods runs before/after each test and performs actions common for each test
-class HESchedulerTest : public ::testing::Test
-{
-protected:
-  void SetUp() override
-  {
-    // Initialize mock backends
-    _cpu_backend = new MockBackendCPU();
-    _gpu_backend = new MockBackendGPU();
-    _npu_backend = new MockBackendNPU();
-    _mock_backends = {_cpu_backend, _gpu_backend, _npu_backend};
-
-    // Remove previous profile data if it exists
-    if (!remove("exec_time.json"))
-    {
-      // DO NOTHING (no profile data)
-    }
-
-    // Remember original value of 'EXECUTOR' environment variable
-    char *executor = std::getenv("EXECUTOR");
-    _original_executor = executor == nullptr ? "" : executor;
-
-    // Remember original value of 'PROFILING_MODE' environment variable
-    char *profiling_mode = std::getenv("PROFILING_MODE");
-    _original_profiling_mode = profiling_mode == nullptr ? "" : profiling_mode;
-  }
-
-  void TearDown() override
-  {
-    delete _cpu_backend;
-    delete _gpu_backend;
-    delete _npu_backend;
-    EXPECT_EQ(remove("exec_time.json"), 0);
-    setenv("EXECUTOR", _original_executor.c_str(), true);
-    setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true);
-  }
-
-  const MockBackendCPU *_cpu_backend{nullptr};
-  const MockBackendGPU *_gpu_backend{nullptr};
-  const MockBackendNPU *_npu_backend{nullptr};
-  std::vector<const Backend *> _mock_backends;
-
-  std::string _original_executor;
-  std::string _original_profiling_mode;
-};
-
-//
-// HEScheduler tests
-//
-
-class HESchedulerTestWithExecutorParam : public HESchedulerTest,
-                                         public testing::WithParamInterface<std::string>
-{
-};
-
-// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
-// one time for each executor
-INSTANTIATE_TEST_CASE_P(AllExecutors, HESchedulerTestWithExecutorParam,
-                        testing::Values(LINEAR, DATAFLOW, PARALLEL));
-
-// Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
-TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
-{
-  setExecutor(GetParam());
-
-  // Prepare graph
-  ir::Subgraphs subgs;
-  auto graph(createStraightGraph());
-  subgs.push(ir::SubgraphIndex{0}, graph);
-  OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2);
-
-  // Set default execution and transfer time
-  setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1);
-  setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul"},
-                             {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
-
-  // Test 1
-  // Expected behaviour: scheduler assigns different backend to each node
-  {
-    // For each backend reduce execution time of one node
-    ExecTime et(_mock_backends);
-    setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1);
-    setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1);
-    setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1);
-    et.storeOperationsExecTime();
-
-    // Test scheduler
-    auto scheduler =
-      compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
-    const auto br = scheduler.schedule(*graph);
-    ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
-    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu");
-    ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "npu");
-  }
-
-  // Test 2
-  // Expected behaviour: scheduler assigns single backend to all nodes because of big transfer time
-  {
-    // Increase transfer time
-    setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5);
-
-    // Test scheduler
-    auto scheduler =
-      compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
-    const auto br = scheduler.schedule(*graph);
-    ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
-    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
-    ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "cpu");
-  }
-}
-
-// Test scheduler behavior for branched graph with known execution time of all nodes and permutes
-TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time)
-{
-  const int64_t NPU_ET = 5000;
-  setExecutor(GetParam());
-
-  // Prepare graph
-  ir::Subgraphs subgs;
-  auto graph(createBranchedGraph());
-  subgs.push(ir::SubgraphIndex{0}, graph);
-  OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
-    sub_op_idx(5);
-
-  // Set default execution and transfer time
-  setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000);
-  setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul", "FullyConnected"},
-                             {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
-
-  // Test 1
-  // Expected behaviour: for dataflow and linear executors scheduler assigns fastest backend to all
-  // nodes, in case of parallel executor scheduler assigns different backends to branches.
-  {
-    // Reduce execution time
-    ExecTime et(_mock_backends);
-    setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, NPU_ET);
-    setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, NPU_ET);
-    setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, NPU_ET);
-    setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET);
-    setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000);
-    setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000);
-    et.storeOperationsExecTime();
-
-    // Test scheduler
-    auto scheduler =
-      compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
-    const auto br = scheduler.schedule(*graph);
-
-    std::string branch1_expected_backend("npu"), branch2_expected_backend("npu");
-    if (GetParam() == PARALLEL)
-    {
-      branch1_expected_backend =
-        br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
-      branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu";
-    }
-
-    ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
-    ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), branch1_expected_backend);
-    ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), branch1_expected_backend);
-    ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), branch2_expected_backend);
-    ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), branch2_expected_backend);
-    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
-  }
-
-  // Test 2
-  // Expected behaviour: scheduler assigns single backend to all nodes
-  {
-    // Increase execution time for GPU backend
-    ExecTime et(_mock_backends);
-    /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt *
-     * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the
-     * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter
-     * branching or scheduler assigns another backend to a node*/
-    setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1);
-    setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1);
-    et.storeOperationsExecTime();
-
-    // Test scheduler
-    auto scheduler =
-      compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
-    const auto br = scheduler.schedule(*graph);
-    ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
-    ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
-    ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
-    ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "npu");
-    ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "npu");
-    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
-  }
-}
-
-// Test scheduler behavior for branched graph and enabled profiling mode
-TEST_F(HESchedulerTest, branched_graph_profiling_mode)
-{
-  const int ET = 1e5;
-
-  // Turn on profiling mode
-  setProfilingMode(true);
-  setExecutor(DATAFLOW);
-
-  // Prepare graph
-  ir::Subgraphs subgs;
-  auto graph(createBranchedGraph());
-  subgs.push(ir::SubgraphIndex{0}, graph);
-  OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
-    sub_op_idx(5);
-
-  // Test 1
-  // Expected behaviour: scheduler assigns backends to nodes with unknown execution time
-  {
-    // Set execution time for all backends/nodes except for cpu/Sub, npu/Mul, gpu/FC
-    ExecTime et(_mock_backends);
-    setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, ET);
-    setOperationExecTime(et, _cpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
-    setOperationExecTime(et, _cpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
-    setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, ET);
-    setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
-    setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, ET);
-    setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET);
-    setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
-    setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET);
-    et.storeOperationsExecTime();
-
-    // Test scheduler
-    auto scheduler =
-      compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
-    const auto br = scheduler.schedule(*graph);
-    ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
-    ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
-    ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "gpu");
-    ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "gpu");
-    ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
-  }
-
-  // Test 2
-  // Expected behaviour: scheduler shuffling backends, so different backends are assigned to
-  // neighbor nodes
-  {
-    // Set execution time for rest backends/nodes (cpu/Sub, npu/Mul, gpu/FC)
-    ExecTime et(_mock_backends);
-    setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET);
-    setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
-    setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
-    et.storeOperationsExecTime();
-
-    // Test scheduler
-    auto scheduler =
-      compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
-    const auto br = scheduler.schedule(*graph);
-    ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
-              br->getBackend(mul1_op_idx)->config()->id());
-    ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
-              br->getBackend(fc1_op_idx)->config()->id());
-    ASSERT_NE(br->getBackend(mul1_op_idx)->config()->id(),
-              br->getBackend(mul2_op_idx)->config()->id());
-    ASSERT_NE(br->getBackend(fc1_op_idx)->config()->id(),
-              br->getBackend(fc2_op_idx)->config()->id());
-    ASSERT_NE(br->getBackend(mul2_op_idx)->config()->id(),
-              br->getBackend(sub_op_idx)->config()->id());
-    ASSERT_NE(br->getBackend(fc2_op_idx)->config()->id(),
-              br->getBackend(sub_op_idx)->config()->id());
-  }
-}
-
-// TODO: Add tests with unknown execution and permutation time
-
-} // unnamed namespace
diff --git a/runtime/onert/test/core/compiler/pass/UnusedOperandEliminationPass.cc b/runtime/onert/test/core/compiler/pass/UnusedOperandEliminationPass.cc
deleted file mode 100644
index b18dedd15..000000000
--- a/runtime/onert/test/core/compiler/pass/UnusedOperandEliminationPass.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-#include "ir/Graph.h"
-#include "compiler/pass/UnusedOperandEliminationPass.h"
-
-using namespace onert::ir;
-using namespace onert::compiler::pass;
-
-TEST(UnusedOperandEliminationPass, Simple)
-{
-  Graph graph;
-
-  // Add tensors
-  Shape shape{1, 2, 2, 1};
-  TypeInfo type{DataType::FLOAT32};
-  auto in = graph.addOperand(shape, type);
-  auto out = graph.addOperand(shape, type);
-
-  auto unused = graph.addOperand(shape, type);
-
-  // Set model inputs/outputs
-  graph.addInput(in);
-  graph.addOutput(out);
-
-  UnusedOperandEliminationPass{graph}.run();
-
-  ASSERT_TRUE(graph.operands().exist(in));
-  ASSERT_TRUE(graph.operands().exist(out));
-  ASSERT_FALSE(graph.operands().exist(unused));
-}
diff --git a/runtime/onert/test/core/exec/ExecInstance.cc b/runtime/onert/test/core/exec/ExecInstance.cc
deleted file mode 100644
index 0183b6276..000000000
--- a/runtime/onert/test/core/exec/ExecInstance.cc
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-#include <thread>
-
-#include "ir/Graph.h"
-#include "compiler/Compiler.h"
-#include "exec/Execution.h"
-#include "ir/operation/BinaryArithmetic.h"
-#include "util/TracingCtx.h"
-
-namespace
-{
-
-using namespace onert::ir;
-
-class CompiledMockUpModel
-{
-public:
-  CompiledMockUpModel()
-  {
-    // Model: two elementwise add operation
-    // model input: lhs, rhs1
-    // model output: second add result (result2)
-    // constant: rhs2
-    // result1 <= (lhs + rhs)
-    // result2 <= (result1 + rhs2)
-    // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1}
-    // activation: none (constant)
-    graph = std::make_shared<Graph>();
-    // 1st add operands (result1 <= lhs + rhs1)
-    Shape shape{1, 2, 2, 1};
-    TypeInfo type{DataType::FLOAT32};
-    static float rhs2_data[4] = {3, 1, -1, 5};
-    auto operand_lhs = graph->addOperand(shape, type);
-    auto operand_rhs1 = graph->addOperand(shape, type);
-    auto operand_result1 = graph->addOperand(shape, type);
-    auto operand_rhs2 = graph->addOperand(shape, type);
-    auto operand_result2 = graph->addOperand(shape, type);
-    graph->operands()
-      .at(operand_rhs2)
-      .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
-    // 2nd add operations (result2 <= result1 + rhs2)
-    operation::BinaryArithmetic::Param param1;
-    param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
-    param1.activation = Activation::NONE;
-    auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
-    auto output_set1 = OperandIndexSequence{operand_result1};
-    graph->addOperation(
-      std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
-    operation::BinaryArithmetic::Param param2;
-    param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
-    param2.activation = Activation::NONE;
-    auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
-    auto output_set2 = OperandIndexSequence{operand_result2};
-    graph->addOperation(
-      std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
-    // Identify model inputs and outputs
-    graph->addInput(operand_lhs);
-    graph->addInput(operand_rhs1);
-    graph->addOutput(operand_result2);
-    graph->verify();
-
-    // Compile
-    auto subgs = std::make_shared<onert::ir::Subgraphs>();
-    subgs->push(onert::ir::SubgraphIndex{0}, graph);
-    tracing_ctx = std::make_unique<onert::util::TracingCtx>(subgs.get());
-    onert::compiler::Compiler compiler{subgs, tracing_ctx.get()};
-    executors = compiler.compile();
-  }
-
-public:
-  std::shared_ptr<Graph> graph;
-  std::shared_ptr<onert::exec::ExecutorMap> executors;
-  std::unique_ptr<onert::util::TracingCtx> tracing_ctx;
-};
-
-TEST(ExecInstance, simple)
-{
-  auto mockup = CompiledMockUpModel();
-  auto graph = mockup.graph;
-  auto executors = mockup.executors;
-
-  auto input1 = IOIndex{0};
-  auto input2 = IOIndex{1};
-  auto output = IOIndex{0};
-
-  const float input1_buffer[4] = {1, 0, -1, -2};
-  const float input2_buffer[4] = {1, -3, 2, -4};
-  float output_buffer[4] = {};
-  const float output_expected[4] = {5, -2, 0, -1};
-
-  onert::exec::Execution execution{executors};
-
-  execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
-  execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
-  execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
-  execution.execute();
-
-  for (auto i = 0; i < 4; i++)
-  {
-    EXPECT_EQ(output_buffer[i], output_expected[i]);
-  }
-}
-
-TEST(ExecInstance, twoCompile)
-{
-  auto mockup = CompiledMockUpModel();
-  auto graph = mockup.graph;
-  auto executors1 = mockup.executors;
-  onert::exec::Execution execution1{executors1};
-
-  auto input1 = IOIndex{0};
-  auto input2 = IOIndex{1};
-  auto output = IOIndex{0};
-
-  const float exe1_input1_buffer[4] = {1, 0, -1, -2};
-  const float exe1_input2_buffer[4] = {1, -3, 2, -4};
-  float exe1_output_buffer[4] = {};
-  const float exe1_output_expected[4] = {5, -2, 0, -1};
-
-  execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
-  execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
-  execution1.setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16);
-
-  // Make new executor: compile again
-  auto subgs = std::make_shared<onert::ir::Subgraphs>();
-  subgs->push(onert::ir::SubgraphIndex{0}, graph);
-  auto tracing_ctx = std::make_unique<onert::util::TracingCtx>(subgs.get());
-  onert::compiler::Compiler compiler{subgs, tracing_ctx.get()};
-  std::shared_ptr<onert::exec::ExecutorMap> executors2 = compiler.compile();
-  onert::exec::Execution execution2{executors2};
-
-  const float exe2_input1_buffer[4] = {2, 1, -2, 0};
-  const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
-  float exe2_output_buffer[4] = {};
-  const float exe2_output_expected[4] = {2, 5, -2, 7};
-
-  execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
-  execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
-  execution2.setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16);
-
-  execution1.execute();
-  execution2.execute();
-
-  for (auto i = 0; i < 4; i++)
-  {
-    EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
-    EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
-  }
-}
-
-// Support two initialized execution instance then ordered execution
-TEST(ExecInstance, twoExecution)
-{
-  auto mockup = CompiledMockUpModel();
-  auto executors = mockup.executors;
-  auto input1 = IOIndex{0};
-  auto input2 = IOIndex{1};
-  auto output1 = IOIndex{0};
-
-  const float exe1_input1_buffer[4] = {1, 0, -1, -2};
-  const float exe1_input2_buffer[4] = {1, -3, 2, -4};
-  float exe1_output_buffer[4] = {};
-  const float exe1_output_expected[4] = {5, -2, 0, -1};
-  const float exe2_output_expected[4] = {2, 5, -2, 7};
-
-  onert::exec::Execution execution1{executors};
-  execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
-  execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
-  execution1.setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16);
-
-  const float exe2_input1_buffer[4] = {2, 1, -2, 0};
-  const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
-  float exe2_output_buffer[4] = {};
-
-  // Make new execution
-  onert::exec::Execution execution2{executors};
-  execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
-  execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
-  execution2.setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16);
-
-  execution1.execute();
-  execution2.execute();
-
-  for (auto i = 0; i < 4; i++)
-  {
-    EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
-    EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
-  }
-}
-
-class Inference
-{
-public:
-  Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4],
-            std::shared_ptr<onert::exec::ExecutorMap> &executors)
-    : _input1{input1}, _input2{input2}, _output{output}, _executors{executors}
-  {
-    // DO NOTHING
-  }
-
-  void inference(void)
-  {
-    auto input1 = IOIndex{0};
-    auto input2 = IOIndex{1};
-    auto output1 = IOIndex{0};
-
-    onert::exec::Execution execution{_executors};
-    execution.setInput(input1, reinterpret_cast<const void *>(_input1), 16);
-    execution.setInput(input2, reinterpret_cast<const void *>(_input2), 16);
-    execution.setOutput(output1, reinterpret_cast<void *>(_output), 16);
-
-    execution.execute();
-  }
-
-private:
-  const float (&_input1)[4];
-  const float (&_input2)[4];
-  float (&_output)[4];
-  std::shared_ptr<onert::exec::ExecutorMap> &_executors;
-};
-
-// Support multi-thread execution
-TEST(ExecInstance, twoThreads)
-{
-  auto mockup = CompiledMockUpModel();
-  auto executors = mockup.executors;
-
-  const float exe1_input1_buffer[4] = {1, 0, -1, -2};
-  const float exe1_input2_buffer[4] = {1, -3, 2, -4};
-  float exe1_output_buffer[4] = {};
-  const float exe1_output_expected[4] = {5, -2, 0, -1};
-
-  Inference execution1{exe1_input1_buffer, exe1_input2_buffer, exe1_output_buffer, executors};
-
-  const float exe2_input1_buffer[4] = {2, 1, -2, 0};
-  const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
-  float exe2_output_buffer[4] = {};
-  const float exe2_output_expected[4] = {2, 5, -2, 7};
-
-  Inference execution2{exe2_input1_buffer, exe2_input2_buffer, exe2_output_buffer, executors};
-
-  std::thread t1{&Inference::inference, &execution1};
-  std::thread t2{&Inference::inference, &execution2};
-
-  t1.join();
-  t2.join();
-
-  for (auto i = 0; i < 4; i++)
-  {
-    EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
-    EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
-  }
-}
-
-// Support asynchronous execution
-TEST(ExecInstance, async)
-{
-  auto mockup = CompiledMockUpModel();
-  auto graph = mockup.graph;
-  auto executors = mockup.executors;
-
-  auto input1 = IOIndex{0};
-  auto input2 = IOIndex{1};
-  auto output = IOIndex{0};
-
-  const float input1_buffer[4] = {1, 0, -1, -2};
-  const float input2_buffer[4] = {1, -3, 2, -4};
-  float output_buffer[4] = {};
-  const float output_expected[4] = {5, -2, 0, -1};
-
-  onert::exec::Execution execution{executors};
-
-  execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
-  execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
-  execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
-  execution.startExecute();
-  execution.waitFinish();
-
-  for (auto i = 0; i < 4; i++)
-  {
-    EXPECT_EQ(output_buffer[i], output_expected[i]);
-  }
-}
-
-} // namespace
diff --git a/runtime/onert/test/core/exec/ExecTime.test.cc b/runtime/onert/test/core/exec/ExecTime.test.cc
deleted file mode 100644
index 178b61ea5..000000000
--- a/runtime/onert/test/core/exec/ExecTime.test.cc
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "exec/ExecTime.h"
-#include "backend/IConfig.h"
-#include "backend/Backend.h"
-#include <gtest/gtest.h>
-#include <string>
-
-namespace
-{
-using namespace onert;
-using namespace exec;
-using namespace backend;
-
-struct MockConfig : public IConfig
-{
-  std::string id() override { return "b1"; }
-  bool initialize() override { return true; };
-  bool supportPermutation() override { return false; }
-  ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
-  {
-    return ir::Layout::UNKNOWN;
-  }
-  bool supportDynamicTensor() override { return false; }
-  bool supportFP16() override { return false; }
-};
-
-struct MockBackend : public ::onert::backend::Backend
-{
-  std::shared_ptr<onert::backend::IConfig> config() const override
-  {
-    return std::make_shared<MockConfig>();
-  }
-  std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&) const override
-  {
-    return nullptr;
-  }
-};
-
-TEST(ExecTime, roundtrip_ok)
-{
-  const auto *b = new MockBackend();
-  std::vector<const Backend *> bs = {b};
-  {
-    ExecTime et(bs);
-    et.updateOperationExecTime(b, "op1", true, 100, 100);
-    et.updateOperationExecTime(b, "op1", true, 200, 200);
-    et.updateOperationExecTime(b, "op1", false, 100, 888);
-    et.storeOperationsExecTime();
-  }
-  {
-    ExecTime et(bs);
-    auto time = et.getOperationExecTime(b, "op1", true, 100);
-    ASSERT_EQ(time, 100);
-    // Check interpolation
-    time = et.getOperationExecTime(b, "op1", true, 150);
-    ASSERT_EQ(time, 150);
-    time = et.getOperationExecTime(b, "op1", false, 100);
-    ASSERT_EQ(time, 888);
-    et.storeOperationsExecTime();
-  }
-  // clean up
-  EXPECT_EQ(remove("exec_time.json"), 0);
-}
-
-TEST(ExecTime, structure)
-{
-
-  const auto *b = new MockBackend();
-  std::vector<const Backend *> bs = {b};
-  {
-    ExecTime et(bs);
-    et.updateOperationExecTime(b, "op1", true, 100, 100);
-    et.updateOperationExecTime(b, "op1", true, 200, 200);
-    et.storeOperationsExecTime();
-  }
-  {
-    ExecTime et(bs);
-    auto time = et.getOperationExecTime(b, "op1", true, 100);
-    ASSERT_EQ(time, 100);
-    // Check interpolation
-    time = et.getOperationExecTime(b, "op1", true, 200);
-    ASSERT_EQ(time, 200);
-    et.storeOperationsExecTime();
-  }
-  // clean up
-  EXPECT_EQ(remove("exec_time.json"), 0);
-}
-} // unnamed namespace
diff --git a/runtime/onert/test/core/interp/ExecManager.cc b/runtime/onert/test/core/interp/ExecManager.cc
deleted file mode 100644
index a9f7cd46a..000000000
--- a/runtime/onert/test/core/interp/ExecManager.cc
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "ir/Graph.h"
-#include "interp/InterpExecutor.h"
-#include "exec/Execution.h"
-#include "ir/operation/BinaryArithmetic.h"
-
-namespace
-{
-
-using namespace onert::ir;
-using InterpExecutor = onert::interp::InterpExecutor;
-using Execution = onert::exec::Execution;
-using ExecutorMap = onert::exec::ExecutorMap;
-
-class InterpExecutorTest : public ::testing::Test
-{
-protected:
-  virtual void SetUp() {}
-  void CreateSimpleModel()
-  {
-    // Model: one elementwise add operation
-    // model input: lhs, rhs
-    // model output: add result
-    // lhs, rhs, result shape: {1, 2, 2, 1}
-    // activation: none (constant)
-    _graph = std::make_unique<Graph>();
-
-    // Add operands
-
-    Shape shape{1, 2, 2, 1};
-    TypeInfo type{DataType::INT32};
-    Shape shape_scalar(0);
-    TypeInfo type_scalar{DataType::INT32};
-
-    auto operand_lhs = _graph->addOperand(shape, type);
-    auto operand_rhs = _graph->addOperand(shape, type);
-    auto operand_result = _graph->addOperand(shape, type);
-
-    // Add operations
-
-    operation::BinaryArithmetic::Param param;
-    param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
-    param.activation = Activation::NONE;
-    auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
-    auto output_set = OperandIndexSequence{operand_result};
-    _graph->addOperation(
-      std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
-
-    // Identify model inputs and outputs
-
-    _graph->getInputs().append(operand_lhs);
-    _graph->getInputs().append(operand_rhs);
-    _graph->getOutputs().append(operand_result);
-
-    _graph->verify();
-
-    auto subgs = std::make_shared<onert::ir::Subgraphs>();
-    subgs->push(onert::ir::SubgraphIndex{0}, _graph);
-    _graph->setSubgraphs(subgs);
-
-    _executors = std::make_shared<ExecutorMap>();
-    _executors->insert(
-      std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
-  }
-
-  void CreateTwoStepModel()
-  {
-    // Model: two elementwise add operation
-    // model input: lhs, rhs1
-    // model output: second add result (result2)
-    // constant: rhs2
-    // result1 <= (lhs + rhs)
-    // result2 <= (result1 + rhs2)
-    // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1}
-    // activation: none (constant)
-    _graph = std::make_unique<Graph>();
-
-    // 1st add operands (result1 <= lhs + rhs1)
-
-    Shape shape{1, 2, 2, 1};
-    TypeInfo type{DataType::INT32};
-    Shape shape_scalar(0);
-    TypeInfo type_scalar{DataType::INT32};
-
-    static int32_t rhs2_data[4] = {3, 1, -1, 5};
-
-    auto operand_lhs = _graph->addOperand(shape, type);
-    auto operand_rhs1 = _graph->addOperand(shape, type);
-    auto operand_result1 = _graph->addOperand(shape, type);
-    auto operand_rhs2 = _graph->addOperand(shape, type);
-    auto operand_result2 = _graph->addOperand(shape, type);
-    _graph->operands()
-      .at(operand_rhs2)
-      .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
-
-    // 2nd add operations (result2 <= result1 + rhs2)
-
-    operation::BinaryArithmetic::Param param1;
-    param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
-    param1.activation = Activation::NONE;
-    auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
-    auto output_set1 = OperandIndexSequence{operand_result1};
-    _graph->addOperation(
-      std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
-
-    operation::BinaryArithmetic::Param param2;
-    param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
-    param2.activation = Activation::NONE;
-    auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
-    auto output_set2 = OperandIndexSequence{operand_result2};
-    _graph->addOperation(
-      std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
-
-    // Identify model inputs and outputs
-
-    _graph->getInputs().append(operand_lhs);
-    _graph->getInputs().append(operand_rhs1);
-    _graph->getOutputs().append(operand_result2);
-
-    _graph->verify();
-
-    auto subgs = std::make_shared<onert::ir::Subgraphs>();
-    subgs->push(onert::ir::SubgraphIndex{0}, _graph);
-    _graph->setSubgraphs(subgs);
-
-    _executors = std::make_shared<ExecutorMap>();
-    _executors->insert(
-      std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
-  }
-
-  void CreateUnspecifiedDimensionsModel()
-  {
-    // Model: one elementwise add operation
-    // model input: lhs, rhs
-    // model output: add result
-    // lhs, rhs, result shape: {1, unknown, 2, 1}
-    // activation: none (constant)
-    _graph = std::make_unique<Graph>();
-
-    // Add operands
-
-    Shape shape{1, 0, 2, 1};
-    TypeInfo type{DataType::INT32};
-    Shape shape_scalar(0);
-    TypeInfo type_scalar{DataType::INT32};
-
-    auto operand_lhs = _graph->addOperand(shape, type);
-    auto operand_rhs = _graph->addOperand(shape, type);
-
-    auto operand_activation = _graph->addOperand(shape_scalar, type_scalar);
-    _graph->operands()
-      .at(operand_activation)
-      .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&_activation_value), 4));
-
-    auto operand_result = _graph->addOperand(shape, type);
-
-    // Add operations
-
-    operation::BinaryArithmetic::Param param;
-    param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
-    param.activation = Activation::NONE;
-    auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
-    auto output_set = OperandIndexSequence{operand_result};
-    _graph->addOperation(
-      std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
-
-    // Identify model inputs and outputs
-
-    _graph->getInputs().append(operand_lhs);
-    _graph->getInputs().append(operand_rhs);
-    _graph->getOutputs().append(operand_result);
-
-    _graph->verify();
-
-    auto subgs = std::make_shared<onert::ir::Subgraphs>();
-    subgs->push(onert::ir::SubgraphIndex{0}, _graph);
-    _graph->setSubgraphs(subgs);
-
-    _executors = std::make_shared<ExecutorMap>();
-    _executors->insert(
-      std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
-  }
-
-  void createExecution() { _execution = std::make_unique<Execution>(_executors); }
-
-  virtual void TearDown() { _executors = nullptr; }
-
-  std::shared_ptr<Graph> _graph{nullptr};
-  std::shared_ptr<ExecutorMap> _executors{nullptr};
-  std::unique_ptr<Execution> _execution{nullptr};
-  const int32_t _activation_value{0};
-};
-
-TEST_F(InterpExecutorTest, create_empty)
-{
-  Graph graph;
-  graph.verify();
-  auto executor = std::make_unique<InterpExecutor>(graph);
-  ASSERT_NE(executor, nullptr);
-}
-
-TEST_F(InterpExecutorTest, create_simple)
-{
-  CreateSimpleModel();
-  ASSERT_NE(_executors, nullptr);
-  ASSERT_NE(_executors->at(onert::ir::SubgraphIndex{0}), nullptr);
-}
-
-TEST_F(InterpExecutorTest, neg_setInput)
-{
-  CreateSimpleModel();
-  createExecution();
-
-  auto input1 = IOIndex{0};
-  const int32_t input1_buffer[4] = {1, 0, -1, -2};
-
-  EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 4),
-               std::runtime_error);
-  EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 12),
-               std::runtime_error);
-  EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, neg_setOutput)
-{
-  CreateSimpleModel();
-  createExecution();
-
-  auto output = IOIndex{0};
-  auto output_idx = _graph->getOutputs().at(output);
-
-  int32_t output_buffer[4] = {};
-
-  EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 4),
-               std::runtime_error);
-  EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 12),
-               std::runtime_error);
-  EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, neg_setInputForUnspecifiedDimensions)
-{
-  CreateUnspecifiedDimensionsModel();
-  createExecution();
-
-  auto input1 = IOIndex{0};
-  const int32_t input1_buffer[4] = {1, 0, -1, -2};
-
-  TypeInfo operand_type{DataType::INT32};
-  Shape operand_shape{1, 2, 2, 1};
-
-  EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
-                                    reinterpret_cast<const void *>(input1_buffer), 4),
-               std::runtime_error);
-  EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
-                                    reinterpret_cast<const void *>(input1_buffer), 12),
-               std::runtime_error);
-  EXPECT_NO_THROW(_execution->setInput(input1, operand_type, operand_shape,
-                                       reinterpret_cast<const void *>(input1_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, neg_setOutputForUnspecifiedDimensions)
-{
-  CreateUnspecifiedDimensionsModel();
-  createExecution();
-
-  auto output = IOIndex{0};
-  auto output_idx = _graph->getOutputs().at(output);
-
-  TypeInfo operand_type{DataType::INT32};
-  Shape operand_shape{1, 2, 2, 1};
-
-  int32_t output_buffer[4] = {};
-
-  EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
-                                     reinterpret_cast<void *>(output_buffer), 4),
-               std::runtime_error);
-  EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
-                                     reinterpret_cast<void *>(output_buffer), 12),
-               std::runtime_error);
-  EXPECT_NO_THROW(_execution->setOutput(output, operand_type, operand_shape,
-                                        reinterpret_cast<void *>(output_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, execute)
-{
-  CreateSimpleModel();
-  createExecution();
-
-  auto input1 = IOIndex{0};
-  auto input2 = IOIndex{1};
-  auto input1_idx = _graph->getInputs().at(input1);
-  auto input2_idx = _graph->getInputs().at(input2);
-
-  const int32_t input1_buffer[4] = {1, 0, -1, -2};
-  const int32_t input2_buffer[4] = {1, -3, 2, -4};
-
-  auto output = IOIndex{0};
-  auto output_idx = _graph->getOutputs().at(output);
-
-  int32_t output_buffer[4] = {};
-
-  EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
-  EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
-  EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
-  EXPECT_NO_THROW(_execution->execute());
-  EXPECT_EQ(output_buffer[0], 2);
-  EXPECT_EQ(output_buffer[1], -3);
-  EXPECT_EQ(output_buffer[2], 1);
-  EXPECT_EQ(output_buffer[3], -6);
-}
-
-TEST_F(InterpExecutorTest, executeTwoStep)
-{
-  CreateTwoStepModel();
-  createExecution();
-
-  auto input1 = IOIndex{0};
-  auto input2 = IOIndex{1};
-  auto input1_idx = _graph->getInputs().at(input1);
-  auto input2_idx = _graph->getInputs().at(input2);
-
-  const int32_t input1_buffer[4] = {1, 0, -1, -2};
-  const int32_t input2_buffer[4] = {1, -3, 2, -4};
-
-  auto output = IOIndex{0};
-  auto output_idx = _graph->getOutputs().at(output);
-
-  int32_t output_buffer[4] = {};
-
-  EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
-  EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
-  EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
-  EXPECT_NO_THROW(_execution->execute());
-  EXPECT_EQ(output_buffer[0], 5);
-  EXPECT_EQ(output_buffer[1], -2);
-  EXPECT_EQ(output_buffer[2], 0);
-  EXPECT_EQ(output_buffer[3], -1);
-}
-
-} // namespace
diff --git a/runtime/onert/test/core/ir/Graph.cc b/runtime/onert/test/core/ir/Graph.cc
deleted file mode 100644
index d6de7c0cc..000000000
--- a/runtime/onert/test/core/ir/Graph.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Graph.h"
-#include "ir/operation/BinaryArithmetic.h"
-#include "ir/verifier/Verifier.h"
-
-TEST(Graph, neg_inputs_and_outputs)
-{
-  onert::ir::Graph graph;
-
-  onert::ir::OperandIndex index0{0u};
-  onert::ir::OperandIndex index1{1u};
-
-  graph.addInput({index0});
-  graph.addInput({index1});
-
-  onert::ir::OperandIndex index10{10u};
-  onert::ir::OperandIndex index11{11u};
-  onert::ir::OperandIndex index12{12u};
-
-  graph.addOutput({index10});
-  graph.addOutput({index11});
-  graph.addOutput({index12});
-
-  ASSERT_EQ(graph.getInputs().size(), 2);
-  ASSERT_EQ(graph.getOutputs().size(), 3);
-
-  onert::ir::IOIndex io_index0{0};
-  onert::ir::IOIndex io_index1{1};
-  onert::ir::IOIndex io_index2{2};
-
-  ASSERT_EQ(graph.getInputs().at(io_index0), 0);
-  ASSERT_EQ(graph.getInputs().at(io_index1), 1);
-
-  ASSERT_EQ(graph.getOutputs().at(io_index0), 10);
-  ASSERT_EQ(graph.getOutputs().at(io_index1), 11);
-  ASSERT_EQ(graph.getOutputs().at(io_index2), 12);
-
-  EXPECT_THROW(graph.getOutputs().at(onert::ir::IOIndex{3}), std::out_of_range);
-}
-
-using namespace onert::ir;
-
-OperationIndex addAddOperation(Graph &graph, const OperandIndexSequence inputs,
-                               const OperandIndexSequence outputs)
-{
-  // Add "ADD" operation
-  operation::BinaryArithmetic::Param param;
-  param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
-  param.activation = Activation::NONE;
-  return graph.addOperation(std::make_unique<operation::BinaryArithmetic>(inputs, outputs, param));
-}
-
-TEST(Graph, OneOpGraphSimpleValid)
-{
-  // Simple Graph with just one Add operation
-
-  Graph graph;
-
-  // Add tensors
-  Shape shape{1, 2, 2, 1};
-  TypeInfo type{DataType::FLOAT32};
-  auto lhs = graph.addOperand(shape, type);
-  auto rhs = graph.addOperand(shape, type);
-  auto res = graph.addOperand(shape, type);
-
-  addAddOperation(graph, {lhs, rhs}, {res});
-
-  // Set model inputs/outputs
-  graph.addInput(lhs);
-  graph.addInput(rhs);
-  graph.addOutput(res);
-
-  graph.verify();
-
-  SUCCEED();
-}
-
-TEST(Graph, neg_InvalidGraph_BadInput)
-{
-  Graph graph;
-
-  // Add tensors
-  Shape shape{1, 2, 2, 1};
-  TypeInfo type{DataType::FLOAT32};
-  auto in = graph.addOperand(shape, type);
-  auto out = graph.addOperand(shape, type);
-
-  // Set model inputs/outputs
-  graph.addInput(in);
-  graph.addOutput(out);
-  graph.addInput(OperandIndex{89}); // Non-exisiting operand!
-
-  EXPECT_ANY_THROW(graph.verify());
-}
-
-TEST(Graph, neg_InvalidGraph_BadOutput)
-{
-  Graph graph;
-
-  // Add tensors
-  Shape shape{1, 2, 2, 1};
-  TypeInfo type{DataType::FLOAT32};
-  auto in = graph.addOperand(shape, type);
-  auto out = graph.addOperand(shape, type);
-
-  // Set model inputs/outputs
-  graph.addInput(in);
-  graph.addOutput(out);
-  graph.addOutput(OperandIndex{12}); // Non-exisiting operand!
-
-  EXPECT_ANY_THROW(graph.verify());
-}
-
-TEST(Graph, neg_InvalidAddOperation_BadInputIndex)
-{
-  Graph graph;
-
-  // Add tensors
-  Shape shape{1, 2, 2, 1};
-  TypeInfo type{DataType::FLOAT32};
-  auto lhs = graph.addOperand(shape, type);
-  auto rhs = graph.addOperand(shape, type);
-  auto res = graph.addOperand(shape, type);
-
-  // Set model inputs/outputs
-  graph.addInput(lhs);
-  graph.addInput(rhs);
-  graph.addOutput(res);
-
-  ASSERT_FALSE(addAddOperation(graph, {lhs, OperandIndex{99}}, {res}).valid());
-}
diff --git a/runtime/onert/test/core/ir/LayoutSet.cc b/runtime/onert/test/core/ir/LayoutSet.cc
deleted file mode 100644
index 591710a4d..000000000
--- a/runtime/onert/test/core/ir/LayoutSet.cc
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/LayoutSet.h"
-
-using onert::ir::Layout;
-using onert::ir::LayoutSet;
-
-TEST(ir_LayoutSet, neg_add_remove)
-{
-  LayoutSet set{Layout::NCHW};
-  set.remove(Layout::NHWC);
-  ASSERT_EQ(set.size(), 1);
-  set.add(Layout::NHWC);
-  ASSERT_EQ(set.size(), 2);
-  set.remove(Layout::NHWC);
-  ASSERT_EQ(set.size(), 1);
-  set.remove(Layout::NCHW);
-  ASSERT_EQ(set.size(), 0);
-  set.remove(Layout::NCHW);
-  ASSERT_EQ(set.size(), 0);
-}
-
-TEST(ir_LayoutSet, neg_add_twice)
-{
-  LayoutSet set;
-  set.add(Layout::NHWC);
-  ASSERT_EQ(set.size(), 1);
-  set.add(Layout::NHWC);
-  ASSERT_EQ(set.size(), 1);
-}
-
-TEST(ir_LayoutSet, set_operators)
-{
-  LayoutSet set1{Layout::NCHW};
-  LayoutSet set2{Layout::NHWC};
-  LayoutSet set3 = set1 | set2;
-
-  ASSERT_EQ(set3.size(), 2);
-
-  ASSERT_EQ((set3 - set1).size(), 1);
-  ASSERT_EQ((set3 - set1).contains(Layout::NHWC), true);
-  ASSERT_EQ((set3 - set2).size(), 1);
-  ASSERT_EQ((set3 - set2).contains(Layout::NCHW), true);
-  ASSERT_EQ((set3 - set3).size(), 0);
-
-  ASSERT_EQ((set3 & set1).size(), 1);
-  ASSERT_EQ((set3 & set1).contains(Layout::NCHW), true);
-  ASSERT_EQ((set3 & set2).size(), 1);
-  ASSERT_EQ((set3 & set2).contains(Layout::NHWC), true);
-  ASSERT_EQ((set1 & set2).size(), 0);
-}
diff --git a/runtime/onert/test/core/ir/OperandIndexSet.cc b/runtime/onert/test/core/ir/OperandIndexSet.cc
deleted file mode 100644
index c363e5472..000000000
--- a/runtime/onert/test/core/ir/OperandIndexSet.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/OperandIndexSequence.h"
-
-using onert::ir::OperandIndex;
-using onert::ir::OperandIndexSequence;
-
-TEST(ir_OperandIndexSequence, neg_append)
-{
-  OperandIndexSequence iset{0, 2, 4, 8};
-
-  ASSERT_EQ(iset.size(), 4);
-
-  iset.append(OperandIndex{10});
-
-  ASSERT_EQ(iset.size(), 5);
-
-  onert::ir::IOIndex index1{1};
-  onert::ir::IOIndex index2{4};
-
-  ASSERT_EQ(iset.at(index1), 2);
-  ASSERT_EQ(iset.at(index2), 10);
-
-  ASSERT_TRUE(iset.contains(OperandIndex{2}));
-  ASSERT_TRUE(iset.contains(OperandIndex{10}));
-  ASSERT_FALSE(iset.contains(OperandIndex{11}));
-}
-
-TEST(graph_OperandIndexSequence, neg_replace)
-{
-  OperandIndexSequence iset{0, 1, 2, 3};
-
-  iset.replace(OperandIndex{1}, OperandIndex{9});
-  ASSERT_FALSE(iset.contains(OperandIndex{1}));
-  ASSERT_TRUE(iset.contains(OperandIndex{9}));
-}
diff --git a/runtime/onert/test/core/ir/OperandSet.cc b/runtime/onert/test/core/ir/OperandSet.cc
deleted file mode 100644
index 6cf9c8842..000000000
--- a/runtime/onert/test/core/ir/OperandSet.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Operands.h"
-
-TEST(ir_Operands, neg_set_test)
-{
-  onert::ir::Operands set;
-
-  onert::ir::Shape shape0{1, 2, 3};
-
-  onert::ir::Shape shape1(4);
-  shape1.dim(0) = 10;
-  shape1.dim(1) = 20;
-  shape1.dim(2) = 30;
-  shape1.dim(3) = 40;
-
-  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
-  set.emplace(shape0, type);
-  set.emplace(shape1, type);
-
-  ASSERT_EQ(set.exist(onert::ir::OperandIndex{0u}), true);
-  ASSERT_EQ(set.exist(onert::ir::OperandIndex{1u}), true);
-  ASSERT_EQ(set.exist(onert::ir::OperandIndex{2u}), false);
-
-  ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(0), 1);
-  ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(1), 2);
-  ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(2), 3);
-}
diff --git a/runtime/onert/test/core/ir/OperationSet.cc b/runtime/onert/test/core/ir/OperationSet.cc
deleted file mode 100644
index 4a17eeb33..000000000
--- a/runtime/onert/test/core/ir/OperationSet.cc
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "MockNode.h"
-#include "ir/Operations.h"
-
-using onert::ir::Operation;
-using onert::ir::OperationIndex;
-using onert::ir::Operations;
-
-TEST(ir_Operations, basic)
-{
-  Operations ops;
-  ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
-  OperationIndex idx{0u};
-  ASSERT_EQ(ops.at(idx).getInputs().size(), 4);
-  ASSERT_EQ(ops.at(idx).getOutputs().size(), 3);
-}
-
-TEST(ir_Operations, neg_at)
-{
-  Operations ops;
-  ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
-  OperationIndex idx{99u};
-  EXPECT_THROW(ops.at(idx), std::out_of_range);
-}
diff --git a/runtime/onert/test/core/ir/SetIO.cc b/runtime/onert/test/core/ir/SetIO.cc
deleted file mode 100644
index 68b477347..000000000
--- a/runtime/onert/test/core/ir/SetIO.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Graph.h"
-#include "ir/Index.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/operation/Conv2D.h"
-#include "ir/operation/Concat.h"
-
-#include <memory>
-
-#include <stdexcept>
-
-using Index = onert::ir::IOIndex;
-using IndexSet = onert::ir::OperandIndexSequence;
-
-TEST(ir_Operation_setIO, operation_setIO_conv)
-{
-  onert::ir::Graph graph;
-
-  onert::ir::Shape shape{3};
-  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
-  // Add Conv
-  using Graph = onert::ir::operation::Conv2D;
-
-  auto input_operand = graph.addOperand(shape, type);
-  auto kernel_operand = graph.addOperand(shape, type);
-  auto bias_operand = graph.addOperand(shape, type);
-  IndexSet inputs{input_operand, kernel_operand, bias_operand};
-
-  Graph::Param conv_params;
-  conv_params.padding.type = onert::ir::PaddingType::SAME;
-  conv_params.stride.horizontal = 1;
-  conv_params.stride.vertical = 1;
-  conv_params.activation = onert::ir::Activation::NONE;
-
-  auto output_operand = graph.addOperand(shape, type).value();
-  IndexSet outputs{output_operand};
-
-  auto conv = std::make_unique<Graph>(inputs, outputs, conv_params);
-
-  ASSERT_NE(conv, nullptr);
-  ASSERT_EQ(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
-  conv->setInputs({8, 9, 10});
-  ASSERT_NE(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
-  ASSERT_EQ(conv->getInputs().at(Index{0}).value(), 8);
-}
-
-TEST(ir_Operation_setIO, neg_operation_setIO_concat)
-{
-  onert::ir::Graph graph;
-
-  onert::ir::Shape shape{3};
-
-  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
-  using Graph = onert::ir::operation::Concat;
-
-  // Add Concat
-  IndexSet inputs;
-  for (int i = 0; i < 6; ++i)
-  {
-    inputs.append(graph.addOperand(shape, type));
-  }
-
-  Graph::Param concat_params{0};
-
-  auto output_operand = graph.addOperand(shape, type).value();
-  IndexSet outputs{output_operand};
-
-  auto concat = std::make_unique<Graph>(inputs, outputs, concat_params);
-
-  ASSERT_NE(concat, nullptr);
-  ASSERT_EQ(concat->getInputs().size(), 6);
-  ASSERT_EQ(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
-
-  concat->setInputs({80, 6, 9, 11});
-  ASSERT_EQ(concat->getInputs().size(), 4);
-  ASSERT_NE(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
-  ASSERT_EQ(concat->getInputs().at(Index{0}).value(), 80);
-  ASSERT_EQ(concat->getInputs().at(Index{2}).value(), 9);
-  ASSERT_THROW(concat->getInputs().at(Index{5}), std::out_of_range);
-}
diff --git a/runtime/onert/test/core/ir/Shape.cc b/runtime/onert/test/core/ir/Shape.cc
deleted file mode 100644
index c24aeda8d..000000000
--- a/runtime/onert/test/core/ir/Shape.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <ir/Shape.h>
-
-#include <gtest/gtest.h>
-
-TEST(ShapeTest, basic_test)
-{
-  {
-    onert::ir::Shape shape(3);
-
-    shape.dim(0) = 1;
-    shape.dim(1) = 2;
-    shape.dim(2) = 3;
-
-    ASSERT_EQ(shape.rank(), 3);
-    ASSERT_EQ(shape.num_elements(), 6);
-    ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
-    ASSERT_EQ(shape.hasUnspecifiedDims(), false);
-  }
-  {
-    onert::ir::Shape shape; // scalar or rank is unspecified
-
-    ASSERT_EQ(shape.rank(), 0);
-    ASSERT_EQ(shape.num_elements(), 1);
-    ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), true);
-    ASSERT_EQ(shape.hasUnspecifiedDims(), false);
-  }
-}
-
-TEST(ShapeTest, neg_basic_test)
-{
-  {
-    onert::ir::Shape shape(2);
-
-    shape.dim(0) = 1;
-    shape.dim(1) = onert::ir::Shape::UNSPECIFIED_DIM;
-
-    ASSERT_EQ(shape.rank(), 2);
-    ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
-    ASSERT_EQ(shape.hasUnspecifiedDims(), true);
-    EXPECT_ANY_THROW(shape.num_elements());
-  }
-}
diff --git a/runtime/onert/test/core/ir/UseDef.cc b/runtime/onert/test/core/ir/UseDef.cc
deleted file mode 100644
index 47c98f939..000000000
--- a/runtime/onert/test/core/ir/UseDef.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Graph.h"
-#include "ir/verifier/Verifier.h"
-#include <memory>
-#include "MockNode.h"
-
-#include <typeindex>
-
-namespace
-{
-
-using IndexSet = onert::ir::OperandIndexSequence;
-using Mock = onert_test::ir::SimpleMock;
-
-} // namespace
-
-TEST(ir_Operand, neg_usedef)
-{
-  onert::ir::Graph graph;
-  onert::ir::verifier::DAGChecker verifier;
-
-  onert::ir::Shape shape(3);
-  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
-  // Model Input/Output
-  auto input_operand = graph.addOperand(shape, type);
-  auto output_operand = graph.addOperand(shape, type);
-
-  graph.addInput(input_operand);
-  graph.addOutput(output_operand);
-
-  // MockNode1
-  auto operand_index1 = graph.addOperand(shape, type);
-  auto mocknode_index1 =
-    graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1}));
-
-  // MockNode2
-  auto operand_index2 = graph.addOperand(shape, type);
-  auto mocknode_index2 =
-    graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2}));
-
-  // MockNode3(two input)
-  auto multiinput_index = graph.addOperation(
-    std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand}));
-
-  graph.verify();
-
-  ASSERT_TRUE(verifier.verify(graph));
-
-  // Check def
-  ASSERT_EQ(graph.operands().at(operand_index1).getDef(), mocknode_index1);
-  ASSERT_EQ(graph.operands().at(operand_index2).getDef(), mocknode_index2);
-  ASSERT_EQ(graph.operands().at(output_operand).getDef(), multiinput_index);
-
-  ASSERT_NE(graph.operands().at(operand_index1).getDef(), mocknode_index2);
-  ASSERT_NE(graph.operands().at(operand_index1).getDef(), multiinput_index);
-
-  // Check use
-  ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index1), true);
-  ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index2), true);
-  ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(multiinput_index), false);
-  ASSERT_EQ(graph.operands().at(operand_index1).getUses().contains(multiinput_index), true);
-  ASSERT_EQ(graph.operands().at(operand_index2).getUses().contains(multiinput_index), true);
-
-  ASSERT_EQ(graph.operands().at(input_operand).getUses().size(), 2);
-  ASSERT_EQ(graph.operands().at(operand_index1).getUses().size(), 1);
-  ASSERT_EQ(graph.operands().at(output_operand).getUses().size(), 0);
-}
diff --git a/runtime/onert/test/core/ir/Verifier.cc b/runtime/onert/test/core/ir/Verifier.cc
deleted file mode 100644
index b4be2d9cd..000000000
--- a/runtime/onert/test/core/ir/Verifier.cc
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Operation.h"
-#include "ir/Graph.h"
-#include "ir/verifier/Verifier.h"
-#include <memory>
-#include "ir/Operand.h"
-#include "MockNode.h"
-
-using IndexSet = onert::ir::OperandIndexSequence;
-using Mock = onert_test::ir::SimpleMock;
-
-TEST(Verifier, dag_checker)
-{
-  onert::ir::Graph graph;
-
-  onert::ir::Shape shape{3};
-  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
-  auto operand1 = graph.addOperand(shape, type);
-  auto operand2 = graph.addOperand(shape, type);
-
-  graph.addInput(operand1);
-  graph.addOutput(operand2);
-
-  graph.addOperation(std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2}));
-
-  onert::ir::verifier::DAGChecker verifier;
-
-  ASSERT_TRUE(verifier.verify(graph));
-}
-
-TEST(Verifier, neg_edge_consistency_checker_1)
-{
-  onert::ir::Graph graph;
-
-  onert::ir::Shape shape{3};
-  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
-  auto operand1 = graph.addOperand(shape, type);
-  auto operand2 = graph.addOperand(shape, type);
-
-  graph.addInput(operand1);
-  graph.addOutput(operand2);
-
-  auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
-  auto op_ind = graph.addOperation(std::move(mock_op));
-
-  graph.operands().at(operand1).removeUse(op_ind); // Manipulate the operand alone
-
-  onert::ir::verifier::EdgeChecker verifier;
-  ASSERT_FALSE(verifier.verify(graph));
-}
-
-TEST(Verifier, neg_edge_consistency_checker_2)
-{
-  onert::ir::Graph graph;
-
-  onert::ir::Shape shape{3};
-  onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
-  auto operand1 = graph.addOperand(shape, type);
-  auto operand2 = graph.addOperand(shape, type);
-
-  graph.addInput(operand1);
-  graph.addOutput(operand2);
-
-  auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
-  auto mock_op_ptr = mock_op.get();
-  auto op_ind = graph.addOperation(std::move(mock_op));
-
-  mock_op_ptr->setInputs({operand2}); // Manipulate the operation alone
-
-  onert::ir::verifier::EdgeChecker verifier;
-  ASSERT_FALSE(verifier.verify(graph));
-}
diff --git a/runtime/onert/test/core/util/Index.cc b/runtime/onert/test/core/util/Index.cc
deleted file mode 100644
index 2d110e326..000000000
--- a/runtime/onert/test/core/util/Index.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "util/Index.h"
-
-using Index = ::onert::util::Index<uint32_t, struct TestTag>;
-
-TEST(Index, neg_index_test)
-{
-  Index idx1{1u};
-  Index idx2{2u};
-  Index idx3{idx1};
-
-  ASSERT_EQ(idx1, 1);
-  ASSERT_EQ(idx1, 1u);
-  ASSERT_EQ(idx1.value(), 1u);
-  ASSERT_NE(idx1, idx2);
-  ASSERT_EQ(idx1, idx3);
-}
diff --git a/runtime/onert/test/core/util/ObjectManager.cc b/runtime/onert/test/core/util/ObjectManager.cc
deleted file mode 100644
index 78f044e56..000000000
--- a/runtime/onert/test/core/util/ObjectManager.cc
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "util/ObjectManager.h"
-#include "util/Index.h"
-
-using namespace onert;
-
-struct TestTag;
-using Index = typename util::Index<uint32_t, TestTag>;
-
-TEST(ObjectManager, emplace)
-{
-  util::ObjectManager<Index, int> man;
-
-  auto index = man.emplace(100);
-  ASSERT_EQ(man.at(index), 100);
-}
-
-TEST(ObjectManager, neg_remove_1)
-{
-  util::ObjectManager<Index, int> man;
-
-  Index index = man.emplace(100);
-  ASSERT_TRUE(man.exist(index));
-  ASSERT_EQ(man.at(index), 100);
-
-  man.remove(index);
-  ASSERT_FALSE(man.exist(index));
-}
-
-TEST(ObjectManager, neg_remove_2)
-{
-  util::ObjectManager<Index, int> man;
-
-  auto index0 = man.emplace(100);
-  auto index1 = man.emplace(200);
-  ASSERT_TRUE(man.exist(index0));
-  ASSERT_EQ(man.at(index0), 100);
-  ASSERT_TRUE(man.exist(index1));
-  ASSERT_EQ(man.at(index1), 200);
-
-  man.remove(index0);
-  ASSERT_FALSE(man.exist(index0));
-  ASSERT_TRUE(man.exist(index1));
-  ASSERT_EQ(man.at(index1), 200);
-}
-
-TEST(ObjectManager, push)
-{
-  util::ObjectManager<Index, int> man;
-
-  // Not specify index
-  auto index = man.push(std::make_unique<int>(100));
-  ASSERT_EQ(man.at(index), 100);
-
-  // Specify index
-  auto index2 = man.push(std::make_unique<int>(200), Index{33});
-  ASSERT_EQ(index2.value(), 33);
-  ASSERT_EQ(man.at(index2), 200);
-
-  auto index3 = man.push(std::make_unique<int>(300));
-  // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1)
-  ASSERT_EQ(index3.value(), 34);
-  ASSERT_EQ(man.at(index3), 300);
-
-  auto index4 = man.push(std::make_unique<int>(400), Index{22});
-  ASSERT_EQ(index4.value(), 22);
-  ASSERT_EQ(man.at(index4), 400);
-
-  auto index5 = man.push(std::make_unique<int>(500));
-  // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1)
-  ASSERT_EQ(index5.value(), 35);
-  ASSERT_EQ(man.at(index5), 500);
-}
-
-TEST(ObjectManager, neg_push)
-{
-  util::ObjectManager<Index, int> man;
-
-  // Specify index
-  auto index = man.push(std::make_unique<int>(100), Index{55});
-  ASSERT_EQ(index.value(), 55);
-  ASSERT_EQ(man.at(index), 100);
-
-  // Specify the same index
-  auto index2 = man.push(std::make_unique<int>(200), Index{55});
-  ASSERT_FALSE(index2.valid());
-}
-
-static const uint32_t kMaxUInt32 = std::numeric_limits<uint32_t>::max();
-
-TEST(ObjectManager, neg_push_undefined_index)
-{
-  util::ObjectManager<Index, int> man;
-
-  // Try inserting invalid(undefined) index
-  auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32});
-  ASSERT_FALSE(index.valid());
-  ASSERT_EQ(man.size(), 0);
-}
-
-TEST(ObjectManager, neg_push_max_index)
-{
-  util::ObjectManager<Index, int> man;
-
-  // Insert an object with maximum valid index
-  auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32 - 1});
-  ASSERT_EQ(index.value(), kMaxUInt32 - 1);
-  ASSERT_EQ(man.at(index), 100);
-  ASSERT_EQ(man.size(), 1);
-
-  // Reached to the final index so next push/emplace must fail
-  auto index2 = man.push(std::make_unique<int>(200));
-  ASSERT_EQ(man.size(), 1);
-  ASSERT_FALSE(index2.valid());
-}
-
-TEST(ObjectManager, neg_emplace_max_index)
-{
-  util::ObjectManager<Index, int> man;
-
-  // Insert an object with maximum valid index
-  auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32 - 1});
-  ASSERT_EQ(index.value(), kMaxUInt32 - 1);
-  ASSERT_EQ(man.at(index), 100);
-  ASSERT_EQ(man.size(), 1);
-
-  // Reached to the final index so next push/emplace must fail
-  auto index3 = man.emplace(200);
-  ASSERT_EQ(man.size(), 1);
-  ASSERT_FALSE(index3.valid());
-}
-
-TEST(ObjectManager, const_iterate)
-{
-  util::ObjectManager<Index, int> man;
-
-  auto index0 = man.emplace(100);
-  auto index1 = man.emplace(200);
-  auto index2 = man.emplace(300);
-
-  int sum = 0;
-  man.iterate([&](const Index &index, const int &val) { sum += val; });
-  ASSERT_EQ(sum, 600);
-}
-
-TEST(ObjectManager, non_const_iterate)
-{
-  util::ObjectManager<Index, int> man;
-
-  auto index0 = man.emplace(100);
-  auto index1 = man.emplace(200);
-  auto index2 = man.emplace(300);
-
-  man.iterate([&](const Index &index, int &val) { val += 1; });
-  ASSERT_EQ(man.at(index0), 101);
-  ASSERT_EQ(man.at(index1), 201);
-  ASSERT_EQ(man.at(index2), 301);
-}
-
-TEST(ObjectManager, set)
-{
-  util::ObjectManager<Index, int> man;
-  auto index = man.set(Index{1}, std::make_unique<int>(100)); // Insert
-  ASSERT_EQ(index, Index{1});
-  auto index2 = man.set(index, std::make_unique<int>(200)); // Overwrite
-  ASSERT_EQ(index2, index);
-  ASSERT_EQ(man.at(index2), 200);
-}
-
-TEST(ObjectManager, neg_set)
-{
-  auto v = std::make_unique<int>(100);
-  util::ObjectManager<Index, int> man;
-  auto index = man.set(Index{}, std::move(v)); // Try set with an invalid index
-  ASSERT_EQ(index, Index{});
-  ASSERT_FALSE(index.valid());
-  ASSERT_NE(v, nullptr); // v must be kept when failure
-}
-
-TEST(ObjectManager, getRawPtr)
-{
-  auto v = std::make_unique<int>(100);
-  auto v_ptr = v.get();
-  util::ObjectManager<Index, int> man;
-  auto index = man.push(std::move(v));
-  ASSERT_EQ(v_ptr, man.getRawPtr(index));
-}
-
-TEST(ObjectManager, neg_getRawPtr)
-{
-  util::ObjectManager<Index, int> man;
-  auto ptr = man.getRawPtr(Index{1});
-  ASSERT_EQ(ptr, nullptr);
-}
diff --git a/runtime/onert/test/core/util/ShapeInference.cc b/runtime/onert/test/core/util/ShapeInference.cc
deleted file mode 100644
index 2ecaa2885..000000000
--- a/runtime/onert/test/core/util/ShapeInference.cc
+++ /dev/null
@@ -1,545 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Layout.h"
-#include "util/ShapeInference.h"
-
-using namespace onert::ir;
-
-TEST(ShapeInference, Elementwise)
-{
-  Shape lhs_shape{1, 299, 299, 3};
-  Shape rhs_shape{3};
-  auto infered_out_shape = onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.dim(0), 1);
-  ASSERT_EQ(infered_out_shape.dim(1), 299);
-  ASSERT_EQ(infered_out_shape.dim(2), 299);
-  ASSERT_EQ(infered_out_shape.dim(3), 3);
-}
-
-TEST(ShapeInference, neg_Elementwise)
-{
-  Shape lhs_shape{1, 299, 299, 3};
-  Shape rhs_shape{5, 3};
-  ASSERT_THROW(onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape), std::runtime_error);
-}
-
-TEST(ShapeInference, Pool2DNodeSame)
-{
-  Shape in_shape{10, 6, 12, 20};
-  Stride stride{3, 7};
-  Padding padding{PaddingType::SAME};
-
-  operation::Pool2D::Param avg_pool_param{
-    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
-  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-
-  operation::Pool2D::Param max_pool_param{
-    operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
-  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-}
-
-TEST(ShapeInference, Pool2DNodeValid)
-{
-  Shape in_shape{10, 6, 12, 20};
-  Stride stride{3, 7};
-  Padding padding{PaddingType::VALID};
-
-  operation::Pool2D::Param avg_pool_param{
-    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
-  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-
-  operation::Pool2D::Param max_pool_param{
-    operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
-  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-}
-
-TEST(ShapeInference, Pool2DNodeExplicit)
-{
-  Shape in_shape{10, 3, 5, 20};
-
-  Stride stride{3, 7};
-  Padding padding{4, 3, 2, 1};
-
-  operation::Pool2D::Param avg_pool_param{
-    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
-  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-
-  operation::Pool2D::Param max_pool_param{
-    operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
-  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-}
-
-TEST(ShapeInference, neg_Pool2DNode_InvalidStride)
-{
-  Shape in_shape{10, 6, 12, 20};
-  Stride stride{0, 7};
-  Padding padding{PaddingType::SAME};
-
-  operation::Pool2D::Param avg_pool_param{
-    operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
-  ASSERT_THROW(onert::shape_inference::inferPoolShape(in_shape, avg_pool_param),
-               std::runtime_error);
-}
-
-TEST(ShapeInference, Conv2D)
-{
-  Shape in_shape{10, 6, 12, 20};
-  Shape ker_shape{30, 3, 6, 20};
-
-  operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE,
-                                 Dilation{1, 1}};
-  auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
-
-  param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE,
-                                   Dilation{1, 1}};
-  infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
-
-  param =
-    operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
-  infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
-}
-
-TEST(ShapeInference, neg_Conv2D_InvalidStride)
-{
-  Shape in_shape{10, 6, 12, 20};
-  Shape ker_shape{30, 3, 6, 20};
-
-  operation::Conv2D::Param param{Stride{0, 0}, Padding{PaddingType::VALID}, Activation::NONE,
-                                 Dilation{1, 1}};
-  ASSERT_THROW(onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param),
-               std::runtime_error);
-}
-
-TEST(ShapeInference, DepthwiseConv2D)
-{
-  Shape in_shape{10, 6, 12, 20};
-  Shape ker_shape{1, 3, 6, 60};
-
-  operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3,
-                                          Activation::NONE, Dilation{1, 1}};
-  auto infered_out_shape =
-    onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
-
-  param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, 3,
-                                            Activation::NONE, Dilation{1, 1}};
-  infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
-
-  param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, 3, Activation::NONE,
-                                            Dilation{1, 1}};
-  infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
-
-  ASSERT_EQ(infered_out_shape.rank(), 4);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
-  ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
-}
-
-TEST(ShapeInference, neg_DepthwiseConv2D_InvalidSride)
-{
-  Shape in_shape{10, 6, 12, 20};
-  Shape ker_shape{1, 3, 6, 60};
-
-  operation::DepthwiseConv2D::Param param{Stride{3, 0}, Padding{PaddingType::VALID}, 3,
-                                          Activation::NONE, Dilation{1, 1}};
-  ASSERT_THROW(onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param),
-               std::runtime_error);
-}
-
-TEST(ShapeInference, Concat)
-{
-  {
-    Shape in1{10, 20, 30, 3, 50};
-    Shape in2{10, 20, 30, 2, 50};
-    Shape in3{10, 20, 30, 2, 50};
-
-    operation::Concat::Param param{3};
-    auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2, in3}, param);
-
-    ASSERT_EQ(infered_out_shape.rank(), 5);
-    ASSERT_EQ(infered_out_shape.dim(0), 10);
-    ASSERT_EQ(infered_out_shape.dim(1), 20);
-    ASSERT_EQ(infered_out_shape.dim(2), 30);
-    ASSERT_EQ(infered_out_shape.dim(3), 7);
-    ASSERT_EQ(infered_out_shape.dim(4), 50);
-  }
-  {
-    // case 1. when axis < 0
-    Shape in1{10, 20, 2};
-    Shape in2{10, 20, 3};
-
-    operation::Concat::Param param{-1};
-    auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
-
-    ASSERT_EQ(infered_out_shape.rank(), 3);
-    ASSERT_EQ(infered_out_shape.dim(0), 10);
-    ASSERT_EQ(infered_out_shape.dim(1), 20);
-    ASSERT_EQ(infered_out_shape.dim(2), 5);
-  }
-  {
-    // case 2. when axis < 0
-    Shape in1{2, 20, 2};
-    Shape in2{3, 20, 2};
-
-    operation::Concat::Param param{-3};
-    auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
-
-    ASSERT_EQ(infered_out_shape.rank(), 3);
-    ASSERT_EQ(infered_out_shape.dim(0), 5);
-    ASSERT_EQ(infered_out_shape.dim(1), 20);
-    ASSERT_EQ(infered_out_shape.dim(2), 2);
-  }
-}
-
-TEST(ShapeInference, neg_Concat)
-{
-  {
-    operation::Concat::Param param{2};
-    Shape in1{10, 1, 3};
-    Shape in2{10, 2, 4}; // dim[1] should be 1 but 2
-
-    EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
-  }
-  { // wrong rank
-    operation::Concat::Param param{2};
-    Shape in1{10, 2, 3, 4};
-    Shape in2{10, 2, 4}; // rank should be 4
-
-    EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
-  }
-}
-
-TEST(ShapeInference, ExpandDims)
-{
-  Shape in_shape{30, 40};
-
-  auto check = [&](int32_t axis, Shape &expected) {
-    auto actual = onert::shape_inference::inferExpandDimsShape(in_shape, axis);
-
-    ASSERT_EQ(actual.rank(), 3);
-    for (int32_t dim = 0; dim < expected.rank(); dim++)
-      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
-  };
-
-  { // boundary
-    int32_t axis = 0;
-    Shape expected{1, 30, 40};
-    check(axis, expected);
-  }
-  { // boundary
-    int32_t axis = 2;
-    Shape expected{30, 40, 1};
-    check(axis, expected);
-  }
-  { // inside
-    int32_t axis = 1;
-    Shape expected{30, 1, 40};
-    check(axis, expected);
-  }
-  { // negative boundary
-    int32_t axis = -1;
-    Shape expected{30, 40, 1};
-    check(axis, expected);
-  }
-  { // negative boundary
-    int32_t axis = -3;
-    Shape expected{1, 30, 40};
-    check(axis, expected);
-  }
-}
-
-TEST(ShapeInference, neg_ExpandDims)
-{
-  Shape in_shape{30, 40};
-
-  { // over boundary
-    int32_t axis = 3;
-    ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
-  }
-  { // over boundary
-    int32_t axis = -4;
-    ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
-  }
-}
-
-TEST(ShapeInference, FullyConnected)
-{
-  Shape in_shape{3, 4, 5, 6};
-  Shape ker_shape{3, 10};
-  auto infered_out_shape = onert::shape_inference::inferFullyConnectedShape(in_shape, ker_shape);
-
-  ASSERT_EQ(infered_out_shape.rank(), 2);
-  ASSERT_EQ(infered_out_shape.dim(0), 36);
-  ASSERT_EQ(infered_out_shape.dim(1), 3);
-}
-
-TEST(ShapeInference, Transpose)
-{
-  auto check = [&](Shape &in_shape, std::vector<int> perm, Shape &expected) {
-    // pre-conditions
-    ASSERT_EQ(in_shape.rank(), perm.size());
-    ASSERT_EQ(expected.rank(), perm.size());
-    auto inferred_out_shape =
-      onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size());
-    // post-conditions
-    ASSERT_EQ(inferred_out_shape.rank(), perm.size());
-    for (int32_t dim = 0; dim < expected.rank(); dim++)
-    {
-      ASSERT_EQ(inferred_out_shape.dim(dim), expected.dim(dim));
-    }
-  };
-  // check for 2-D
-  {
-    Shape in_shape{2, 3};
-    std::vector<int> perm = {1, 0};
-    Shape expected{3, 2};
-    // int32_t rank = 2;
-    check(in_shape, perm, expected);
-  }
-  // check for 3-D
-  {
-    Shape in_shape{1, 2, 3};
-    std::vector<int> perm = {2, 0, 1};
-    Shape expected{3, 1, 2};
-    // int32_t rank = 3;
-    check(in_shape, perm, expected);
-  }
-  // check for 4-D
-  {
-    Shape in_shape{1, 2, 3, 4};
-    std::vector<int> perm = {1, 3, 0, 2};
-    Shape expected{2, 4, 1, 3};
-    // int32_t rank = 4;
-    check(in_shape, perm, expected);
-  }
-}
-
-TEST(ShapeInference, neg_Transpose)
-{
-  Shape in_shape{1, 2, 3};
-  // Invalid parameter size
-  {
-    std::vector<int> perm = {2, 0, 1, 0};
-    // int32_t rank = 3;
-    ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
-                 std::runtime_error);
-  }
-  // Invalid parameter value
-  {
-    std::vector<int> perm = {2, 0, 3};
-    // int32_t rank = 3;
-    ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
-                 std::runtime_error);
-  }
-}
-
-TEST(ShapeInference, Gather)
-{
-  auto check = [&](Shape &input, Shape &indices, Shape &expected, int32_t axis) {
-    int rank = input.rank();
-    auto actual = onert::shape_inference::inferGatherShape(input, indices, axis, rank);
-
-    ASSERT_EQ(actual.rank(), expected.rank());
-
-    for (int32_t dim = 0; dim < expected.rank(); dim++)
-      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
-  };
-
-  // check for 2-D, 3-D, axis 0
-  {
-    Shape input{3, 4};
-    Shape indices{1, 1, 2};
-    int32_t axis = 0;
-    Shape expected{1, 1, 2, 4};
-    check(input, indices, expected, axis);
-  }
-
-  // check for 2-D, 3-D, axis 1
-  {
-    Shape input{3, 4};
-    Shape indices{1, 2, 1};
-    int32_t axis = 1;
-    Shape expected{3, 1, 2, 1};
-    check(input, indices, expected, axis);
-  }
-
-  // check for 3-D, 2-D, axis 0
-  {
-    Shape input{2, 3, 4};
-    Shape indices{1, 2};
-    int32_t axis = 0;
-    Shape expected{1, 2, 3, 4};
-    check(input, indices, expected, axis);
-  }
-
-  // check for 3-D, 2-D, axis 2
-  {
-    Shape input{2, 3, 4};
-    Shape indices{2, 1};
-    int32_t axis = 2;
-    Shape expected{2, 3, 2, 1};
-    check(input, indices, expected, axis);
-  }
-
-  // check for 4D, axis 0
-  {
-    Shape input{1, 2, 3, 4};
-    Shape indices{2};
-    int32_t axis = 0;
-    Shape expected{2, 2, 3, 4};
-    check(input, indices, expected, axis);
-  }
-}
-
-TEST(ShapeInference, BCQFullyConnected)
-{
-  auto check = [&](Shape &in_shape, Shape &cluster_shape, std::vector<int> cluster,
-                   Shape &expected) {
-    auto actual =
-      onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, cluster.data());
-    ASSERT_EQ(actual.rank(), expected.rank());
-
-    for (int32_t dim = 0; dim < expected.rank(); dim++)
-      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
-  };
-
-  {
-    Shape in_shape{10, 1};
-    Shape cluster_shape{3, 2};
-    std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
-
-    Shape expected{30, 1};
-    check(in_shape, cluster_shape, cluster, expected);
-  }
-
-  {
-    Shape in_shape{1, 1};
-    Shape cluster_shape{1, 2};
-    std::vector<int> cluster = {3, 50};
-
-    Shape expected{50, 1};
-    check(in_shape, cluster_shape, cluster, expected);
-  }
-}
-
-TEST(ShapeInference, BCQGather)
-{
-  auto check = [&](Shape &indices_shape, Shape &cluster_shape, std::vector<int> cluster,
-                   uint32_t hidden_size, uint32_t axis, int rank, Shape &expected) {
-    operation::BCQGather::Param param{hidden_size, axis};
-    auto actual = onert::shape_inference::inferBCQGatherShape(indices_shape, cluster_shape,
-                                                              cluster.data(), rank, param);
-    ASSERT_EQ(actual.rank(), expected.rank());
-
-    for (int32_t dim = 0; dim < expected.rank(); dim++)
-      ASSERT_EQ(actual.dim(dim), expected.dim(dim));
-  };
-
-  {
-    Shape indices_shape{5, 1};
-    Shape cluster_shape{3, 2};
-    std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
-    uint32_t hidden_size = 10;
-    uint32_t axis = 0;
-    int rank = 2;
-
-    Shape expected{5, 1, 10};
-    check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
-  }
-
-  {
-    Shape indices_shape{5, 1};
-    Shape cluster_shape{3, 2};
-    std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
-    uint32_t hidden_size = 10;
-    uint32_t axis = 1;
-    int rank = 2;
-
-    Shape expected{30, 5, 1};
-    check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
-  }
-}
diff --git a/runtime/service/CMakeLists.txt b/runtime/service/CMakeLists.txt
new file mode 100644
index 000000000..5ea6cdadd
--- /dev/null
+++ b/runtime/service/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectories()
diff --git a/runtime/service/npud/CMakeLists.txt b/runtime/service/npud/CMakeLists.txt
new file mode 100644
index 000000000..8cf51649c
--- /dev/null
+++ b/runtime/service/npud/CMakeLists.txt
@@ -0,0 +1,21 @@
+if(NOT BUILD_NPUD)
+  return()
+endif(NOT BUILD_NPUD)
+
+nnfw_find_package(GLib2.0 REQUIRED)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_executable(npud ${SOURCES})
+set_target_properties(npud PROPERTIES LINKER_LANGUAGE CXX)
+target_include_directories(npud PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+target_include_directories(npud PUBLIC ${GLIB2.0_INCLUDE_DIRS})
+target_link_libraries(npud PRIVATE nnfw_lib_misc)
+target_link_libraries(npud PRIVATE ${GLIB2.0_LIBRARIES})
+target_link_libraries(npud PRIVATE ${LIB_PTHREAD})
+
+if(ENVVAR_NPUD_CONFIG)
+  target_compile_definitions(npud PRIVATE ENVVAR_FOR_DEFAULT_CONFIG)
+endif(ENVVAR_NPUD_CONFIG)
+
+install(TARGETS npud DESTINATION bin)
diff --git a/runtime/service/npud/core/Server.cc b/runtime/service/npud/core/Server.cc
new file mode 100644
index 000000000..5b15388dc
--- /dev/null
+++ b/runtime/service/npud/core/Server.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Server.h"
+
+#include <thread>
+#include <util/Logging.h>
+
+namespace npud
+{
+namespace core
+{
+
+std::atomic_bool Server::_isRunning(false);
+
+Server::Server() noexcept
+  : _mainloop(g_main_loop_new(NULL, FALSE), g_main_loop_unref), _signal(std::make_unique<Signal>())
+{
+}
+
+void Server::run(void)
+{
+  VERBOSE(Server) << "Starting Server\n";
+
+  if (_isRunning.exchange(true))
+  {
+    throw std::runtime_error("Mainloop is already running.");
+  }
+
+  g_main_loop_run(_mainloop.get());
+}
+
+void Server::stop(void)
+{
+  VERBOSE(Server) << "Stop Server\n";
+
+  if (!_isRunning.load())
+  {
+    throw std::runtime_error("Mainloop is not running");
+  }
+
+  while (!g_main_loop_is_running(_mainloop.get()))
+  {
+    std::this_thread::yield();
+  }
+
+  g_main_loop_quit(_mainloop.get());
+  _isRunning = false;
+}
+
+} // namespace core
+} // namespace npud
diff --git a/runtime/service/npud/core/Server.h b/runtime/service/npud/core/Server.h
new file mode 100644
index 000000000..e2f37f8fe
--- /dev/null
+++ b/runtime/service/npud/core/Server.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_SERVER_H__
+#define __ONE_SERVICE_NPUD_CORE_SERVER_H__
+
+#include "Signal.h"
+
+#include <glib.h>
+#include <memory>
+#include <atomic>
+
+namespace npud
+{
+namespace core
+{
+
+class Server
+{
+public:
+  void run(void);
+  void stop(void);
+
+  static Server &instance(void)
+  {
+    static Server server;
+    return server;
+  }
+
+private:
+  Server() noexcept;
+
+  static std::atomic_bool _isRunning;
+
+  std::unique_ptr<GMainLoop, void (*)(GMainLoop *)> _mainloop;
+  std::unique_ptr<Signal> _signal;
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_SERVER_H__
diff --git a/runtime/service/npud/core/Signal.cc b/runtime/service/npud/core/Signal.cc
new file mode 100644
index 000000000..085535a6a
--- /dev/null
+++ b/runtime/service/npud/core/Signal.cc
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Signal.h"
+
+#include "Server.h"
+#include <util/Logging.h>
+
+#include <csignal>
+
+namespace npud
+{
+namespace core
+{
+
+Signal::Signal(void) noexcept { init(); }
+
+void Signal::init(void)
+{
+  // NOTE Types of signals
+  // SIGTERM: termination request, sent to the program
+  // SIGSEGV: invalid memory access (segmentation fault)
+  // SIGINT:  external interrupt, usually initiated by the user
+  // SIGILL:	invalid program image, such as invalid instruction
+  // SIGABRT:	abnormal termination condition, as is e.g. initiated by std::abort()
+  // SIGFPE: 	erroneous arithmetic operation such as divide by zero
+  // from https://en.cppreference.com/w/cpp/utility/program/SIG_types
+  std::signal(SIGTERM, handleSignal);
+  std::signal(SIGSEGV, handleSignal);
+  std::signal(SIGINT, handleSignal);
+  std::signal(SIGILL, handleSignal);
+  std::signal(SIGABRT, handleSignal);
+  std::signal(SIGFPE, handleSignal);
+}
+
+void Signal::handleSignal(int signum)
+{
+  VERBOSE(signal) << "Signal received: " << strsignal(signum) << "(" << signum << ")\n";
+  Server::instance().stop();
+}
+
+} // namespace core
+} // namespace npud
diff --git a/runtime/service/npud/core/Signal.h b/runtime/service/npud/core/Signal.h
new file mode 100644
index 000000000..ffddc7255
--- /dev/null
+++ b/runtime/service/npud/core/Signal.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_SIGNAL_H__
+#define __ONE_SERVICE_NPUD_CORE_SIGNAL_H__
+
+namespace npud
+{
+namespace core
+{
+
+class Signal
+{
+public:
+  Signal() noexcept;
+
+  void init(void);
+  static void handleSignal(int signum);
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_SIGNAL_H__
diff --git a/runtime/service/npud/core/main.cc b/runtime/service/npud/core/main.cc
new file mode 100644
index 000000000..bd885b207
--- /dev/null
+++ b/runtime/service/npud/core/main.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Server.h"
+
+#include <util/Logging.h>
+
+using namespace npud;
+
+int main(int argc, const char *argv[])
+{
+  auto &server = core::Server::instance();
+
+  VERBOSE(main) << "Starting npud\n";
+  try
+  {
+    server.run();
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cerr << err.what() << std::endl;
+    return 1;
+  }
+
+  VERBOSE(main) << "Finished npud\n";
+  return 0;
+}
diff --git a/runtime/service/npud/util/Config.lst b/runtime/service/npud/util/Config.lst
new file mode 100644
index 000000000..d45b37352
--- /dev/null
+++ b/runtime/service/npud/util/Config.lst
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CONFIG
+#error  Define CONFIG before including this file
+#endif
+
+//     Name                    | Type         | Default
+CONFIG(NPUD_LOG_ENABLE         , bool         , "0")
diff --git a/runtime/service/npud/util/ConfigSource.cc b/runtime/service/npud/util/ConfigSource.cc
new file mode 100644
index 000000000..7a14b0200
--- /dev/null
+++ b/runtime/service/npud/util/ConfigSource.cc
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConfigSource.h"
+
+#include <misc/EnvConfigSource.h>
+#include <misc/GeneralConfigSource.h>
+#include <misc/IConfigSource.h>
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <memory>
+
+namespace npud
+{
+namespace util
+{
+
+using namespace nnfw::misc;
+
+static std::unique_ptr<IConfigSource> _source;
+
+void config_source(std::unique_ptr<IConfigSource> &&source) { _source = std::move(source); }
+
+static IConfigSource *config_source()
+{
+  if (!_source)
+  {
+#ifdef ENVVAR_FOR_DEFAULT_CONFIG
+    // Default ConfigSource is EnvConfigSource
+    _source = std::make_unique<EnvConfigSource>();
+#else
+    _source = std::make_unique<GeneralConfigSource>();
+#endif // ENVVAR_FOR_DEFAULT_CONFIG
+  }
+  return _source.get();
+}
+
+static std::string getConfigOrDefault(const std::string &key)
+{
+  static std::unordered_map<std::string, std::string> defaults;
+  if (defaults.empty())
+  {
+#define CONFIG(Name, Type, Default)               \
+  {                                               \
+    auto name = std::string{#Name};               \
+    defaults.emplace(name, std::string{Default}); \
+  }
+
+#include "Config.lst"
+
+#undef CONFIG
+  }
+
+  // Treat empty string and absence of the value to be the same
+  auto ret = config_source()->get(key);
+  // if not found search from defaults
+  if (ret.empty())
+  {
+    auto itr = defaults.find(key);
+    if (itr != defaults.end())
+    {
+      // Return the default value if exists
+      ret = itr->second;
+    }
+  }
+
+  return ret;
+}
+
+bool toBool(const std::string &val)
+{
+  static const std::array<std::string, 5> false_list{"0", "OFF", "FALSE", "N", "NO"};
+  auto false_found = std::find(false_list.begin(), false_list.end(), val);
+  return false_found == false_list.end();
+}
+
+int toInt(const std::string &val) { return std::stoi(val); }
+
+bool getConfigBool(const std::string &key)
+{
+  auto raw = getConfigOrDefault(key);
+  return toBool(raw);
+}
+
+int getConfigInt(const std::string &key)
+{
+  auto raw = getConfigOrDefault(key);
+  return toInt(raw);
+}
+
+std::string getConfigString(const std::string &key) { return getConfigOrDefault(key); }
+
+} // namespace util
+} // namespace npud
+
+namespace npud
+{
+namespace util
+{
+namespace config
+{
+
+#define CONFIG(Name, Type, Default) const char *Name = #Name;
+
+#include "Config.lst"
+
+#undef CONFIG
+
+} // namespace config
+} // namespace util
+} // namespace npud
diff --git a/runtime/service/npud/util/ConfigSource.h b/runtime/service/npud/util/ConfigSource.h
new file mode 100644
index 000000000..f4ecc79a5
--- /dev/null
+++ b/runtime/service/npud/util/ConfigSource.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_UTIL_CONFIG_SOURCE_H__
+#define __ONE_SERVICE_NPUD_UTIL_CONFIG_SOURCE_H__
+
+#include <string>
+
+namespace npud
+{
+namespace util
+{
+
+bool getConfigBool(const std::string &key);
+int getConfigInt(const std::string &key);
+std::string getConfigString(const std::string &key);
+
+} // namespace util
+} // namespace npud
+
+namespace npud
+{
+namespace util
+{
+namespace config
+{
+
+#define CONFIG(Name, Type, Default) extern const char *Name;
+
+#include "Config.lst"
+
+#undef CONFIG
+
+} // namespace config
+} // namespace util
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_UTIL_CONFIG_SOURCE_H__
diff --git a/runtime/service/npud/util/Logging.h b/runtime/service/npud/util/Logging.h
new file mode 100644
index 000000000..0b75b3966
--- /dev/null
+++ b/runtime/service/npud/util/Logging.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_UTIL_LOGGING_H__
+#define __ONE_SERVICE_NPUD_UTIL_LOGGING_H__
+
+#include <iostream>
+#include <cstring>
+
+#include "ConfigSource.h"
+
+namespace npud
+{
+namespace util
+{
+namespace logging
+{
+class Context
+{
+public:
+  Context() noexcept : _enabled{false}
+  {
+    const auto env = util::getConfigBool(util::config::NPUD_LOG_ENABLE);
+
+    if (env)
+    {
+      _enabled = true;
+    }
+  }
+
+  static Context &get() noexcept
+  {
+    static Context ctx;
+    return ctx;
+  }
+
+public:
+  bool enabled(void) const { return _enabled; }
+
+private:
+  bool _enabled;
+};
+
+static Context &ctx = Context::get();
+
+inline std::string decorated_name(const char *input)
+{
+  const int min_prefix = 16;
+  std::string prefix(input);
+  auto len_prefix = prefix.size();
+  if (len_prefix > min_prefix)
+    return "[" + prefix + "] ";
+  std::string spaces((min_prefix - len_prefix) / 2, ' ');
+  return (len_prefix % 2 ? "[ " : "[") + spaces + prefix + spaces + "] ";
+}
+} // namespace logging
+} // namespace util
+} // namespace npud
+
+#define VERBOSE(name)                       \
+  if (::npud::util::logging::ctx.enabled()) \
+  std::cout << ::npud::util::logging::decorated_name(#name)
+
+#define VERBOSE_F()                         \
+  if (::npud::util::logging::ctx.enabled()) \
+  std::cout << ::npud::util::logging::decorated_name(__func__)
+
+#define WHEN_LOG_ENABLED(METHOD)            \
+  if (::npud::util::logging::ctx.enabled()) \
+    do                                      \
+    {                                       \
+      METHOD;                               \
+  } while (0)
+
+#endif // __ONE_SERVICE_NPUD_UTIL_LOGGING_H__
diff --git a/tests/nnapi/CMakeLists.txt b/tests/nnapi/CMakeLists.txt
index 67ac90f15..c1fa308a1 100644
--- a/tests/nnapi/CMakeLists.txt
+++ b/tests/nnapi/CMakeLists.txt
@@ -7,11 +7,6 @@ if (NOT BUILD_ONERT)
   return()
 endif(NOT BUILD_ONERT)
 
-# GCC Compiler under 6.2 is not support this test build
-if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.2)
-  return()
-endif()
-
 if (ANDROID_BOOST_ROOT)
   set(BOOST_ROOT ${ANDROID_BOOST_ROOT})
 endif (ANDROID_BOOST_ROOT)
diff --git a/tests/nnfw_api/src/CircleGen.cc b/tests/nnfw_api/src/CircleGen.cc
index e4e4ba1af..4f1c7f9f5 100644
--- a/tests/nnfw_api/src/CircleGen.cc
+++ b/tests/nnfw_api/src/CircleGen.cc
@@ -269,6 +269,20 @@ uint32_t CircleGen::addOperatorFloorDiv(const OperatorParams &params)
                                 circle::BuiltinOptions_NONE, 0);
 }
 
+uint32_t CircleGen::addOperatorGreater(const OperatorParams &params)
+{
+  auto options = circle::CreateLessOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_GREATER,
+                                circle::BuiltinOptions_GreaterOptions, options);
+}
+
+uint32_t CircleGen::addOperatorGreaterEqual(const OperatorParams &params)
+{
+  auto options = circle::CreateGreaterOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_GREATER_EQUAL,
+                                circle::BuiltinOptions_GreaterEqualOptions, options);
+}
+
 uint32_t CircleGen::addOperatorL2Normalization(const OperatorParams &params)
 {
   auto options = circle::CreateL2NormOptions(_fbb).Union();
@@ -283,6 +297,13 @@ uint32_t CircleGen::addOperatorLess(const OperatorParams &params)
                                 circle::BuiltinOptions_LessOptions, options);
 }
 
+uint32_t CircleGen::addOperatorLessEqual(const OperatorParams &params)
+{
+  auto options = circle::CreateLessOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_LESS_EQUAL,
+                                circle::BuiltinOptions_LessEqualOptions, options);
+}
+
 uint32_t CircleGen::addOperatorLeakyRelu(const OperatorParams &params, float alpha)
 {
   auto options = circle::CreateLeakyReluOptions(_fbb, alpha).Union();
@@ -319,6 +340,13 @@ uint32_t CircleGen::addOperatorNeg(const OperatorParams &params)
                                 circle::BuiltinOptions_NegOptions, options);
 }
 
+uint32_t CircleGen::addOperatorNotEqual(const OperatorParams &params)
+{
+  auto options = circle::CreateEqualOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_NOT_EQUAL,
+                                circle::BuiltinOptions_NotEqualOptions, options);
+}
+
 uint32_t CircleGen::addOperatorOneHot(const OperatorParams &params, int32_t axis)
 {
   auto options = circle::CreateOneHotOptions(_fbb, axis).Union();
diff --git a/tests/nnfw_api/src/CircleGen.h b/tests/nnfw_api/src/CircleGen.h
index 062a8d35a..d780eb1bb 100644
--- a/tests/nnfw_api/src/CircleGen.h
+++ b/tests/nnfw_api/src/CircleGen.h
@@ -174,16 +174,20 @@ public:
   uint32_t addOperatorFullyConnected(const OperatorParams &params,
                                      circle::FullyConnectedOptionsWeightsFormat weights_format =
                                        circle::FullyConnectedOptionsWeightsFormat_DEFAULT);
+  uint32_t addOperatorGreater(const OperatorParams &params);
+  uint32_t addOperatorGreaterEqual(const OperatorParams &params);
   uint32_t addOperatorIf(const OperatorParams &params, uint32_t then_subg, uint32_t else_subg);
   uint32_t addOperatorInstanceNorm(const OperatorParams &params, float epsilon,
                                    circle::ActivationFunctionType actfn);
   uint32_t addOperatorL2Normalization(const OperatorParams &params);
   uint32_t addOperatorLeakyRelu(const OperatorParams &params, float alpha);
   uint32_t addOperatorLess(const OperatorParams &params);
+  uint32_t addOperatorLessEqual(const OperatorParams &params);
   uint32_t addOperatorLogSoftmax(const OperatorParams &params);
   uint32_t addOperatorMul(const OperatorParams &params, circle::ActivationFunctionType actfn);
   uint32_t addOperatorMean(const OperatorParams &params, bool keep_dims);
   uint32_t addOperatorNeg(const OperatorParams &params);
+  uint32_t addOperatorNotEqual(const OperatorParams &params);
   uint32_t addOperatorOneHot(const OperatorParams &params, int32_t axis);
   uint32_t addOperatorPad(const OperatorParams &params);
   uint32_t addOperatorPadV2(const OperatorParams &params);
diff --git a/tests/nnfw_api/src/GenModelTest.h b/tests/nnfw_api/src/GenModelTest.h
index eee50d112..90b7cfcad 100644
--- a/tests/nnfw_api/src/GenModelTest.h
+++ b/tests/nnfw_api/src/GenModelTest.h
@@ -398,7 +398,9 @@ protected:
           // Check output tensor values
           auto &ref_output = ref_outputs[i];
           auto &output = _so.outputs[i];
-          ASSERT_EQ(output.size(), ref_output.size());
+          auto expected_tensor_size = ref_output.size();
+          auto actual_tensor_size = output.size();
+          ASSERT_EQ(expected_tensor_size, actual_tensor_size) << "Output #" << i;
 
           switch (ti.dtype)
           {
@@ -419,9 +421,10 @@ protected:
               // TODO better way for handling FP error?
               for (uint32_t e = 0; e < ref_output.size() / sizeof(float); e++)
               {
-                float refval = reinterpret_cast<const float *>(ref_output.data())[e];
-                float val = reinterpret_cast<const float *>(output.data())[e];
-                EXPECT_NEAR(refval, val, 0.001) << "Output #" << i << ", Element Index : " << e;
+                float expected = reinterpret_cast<const float *>(ref_output.data())[e];
+                float actual = reinterpret_cast<const float *>(output.data())[e];
+                EXPECT_NEAR(expected, actual, 0.001)
+                  << "Output #" << i << ", Element Index : " << e;
               }
               break;
             case NNFW_TYPE_TENSOR_INT64:
@@ -445,9 +448,9 @@ private:
   {
     for (uint32_t e = 0; e < ref_buf.size() / sizeof(T); e++)
     {
-      T ref = reinterpret_cast<const T *>(ref_buf.data())[e];
-      T act = reinterpret_cast<const T *>(act_buf.data())[e];
-      EXPECT_EQ(ref, act) << "Output #" << index << ", Element Index : " << e;
+      T expected = reinterpret_cast<const T *>(ref_buf.data())[e];
+      T actual = reinterpret_cast<const T *>(act_buf.data())[e];
+      EXPECT_EQ(expected, actual) << "Output #" << index << ", Element Index : " << e;
     }
   }
 
@@ -457,10 +460,10 @@ private:
     for (uint32_t e = 0; e < ref_buf.size() / sizeof(uint8_t); e++)
     {
       uint8_t ref_raw = reinterpret_cast<const uint8_t *>(ref_buf.data())[e];
-      bool ref = (ref_raw != 0 ? true : false);
+      bool expected = (ref_raw != 0 ? true : false);
       uint8_t act_raw = reinterpret_cast<const uint8_t *>(act_buf.data())[e];
-      bool act = (act_raw != 0 ? true : false);
-      EXPECT_EQ(ref, act) << "Output #" << index << ", Element Index : " << e;
+      bool actual = (act_raw != 0 ? true : false);
+      EXPECT_EQ(expected, actual) << "Output #" << index << ", Element Index : " << e;
     }
   }
 
diff --git a/tests/nnfw_api/src/GenModelTests.cc b/tests/nnfw_api/src/GenModelTests.test.cc
index 53a3571db..53a3571db 100644
--- a/tests/nnfw_api/src/GenModelTests.cc
+++ b/tests/nnfw_api/src/GenModelTests.test.cc
diff --git a/tests/nnfw_api/src/ModelTestDynamicTensor.cc b/tests/nnfw_api/src/ModelTestDynamicTensor.test.cc
index 1ed8f9581..1ed8f9581 100644
--- a/tests/nnfw_api/src/ModelTestDynamicTensor.cc
+++ b/tests/nnfw_api/src/ModelTestDynamicTensor.test.cc
diff --git a/tests/nnfw_api/src/ModelTestInputReshaping.cc b/tests/nnfw_api/src/ModelTestInputReshaping.test.cc
index f5ce3e062..f5ce3e062 100644
--- a/tests/nnfw_api/src/ModelTestInputReshaping.cc
+++ b/tests/nnfw_api/src/ModelTestInputReshaping.test.cc
diff --git a/tests/nnfw_api/src/RegressionTests.cc b/tests/nnfw_api/src/RegressionTests.test.cc
index de233390d..de233390d 100644
--- a/tests/nnfw_api/src/RegressionTests.cc
+++ b/tests/nnfw_api/src/RegressionTests.test.cc
diff --git a/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc b/tests/nnfw_api/src/ValidationTestAddModelLoaded.test.cc
index 4c482369f..4c482369f 100644
--- a/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc
+++ b/tests/nnfw_api/src/ValidationTestAddModelLoaded.test.cc
diff --git a/tests/nnfw_api/src/ValidationTestAddSessionPrepared.cc b/tests/nnfw_api/src/ValidationTestAddSessionPrepared.test.cc
index d668a1cb0..d668a1cb0 100644
--- a/tests/nnfw_api/src/ValidationTestAddSessionPrepared.cc
+++ b/tests/nnfw_api/src/ValidationTestAddSessionPrepared.test.cc
diff --git a/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc b/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.test.cc
index e09402b01..e09402b01 100644
--- a/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc
+++ b/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.test.cc
diff --git a/tests/nnfw_api/src/ValidationTestMultipleSessions.cc b/tests/nnfw_api/src/ValidationTestMultipleSessions.test.cc
index ef00dc6bd..ef00dc6bd 100644
--- a/tests/nnfw_api/src/ValidationTestMultipleSessions.cc
+++ b/tests/nnfw_api/src/ValidationTestMultipleSessions.test.cc
diff --git a/tests/nnfw_api/src/ValidationTestPipelineSession.cc b/tests/nnfw_api/src/ValidationTestPipelineSession.test.cc
index 1d92095ed..1d92095ed 100644
--- a/tests/nnfw_api/src/ValidationTestPipelineSession.cc
+++ b/tests/nnfw_api/src/ValidationTestPipelineSession.test.cc
diff --git a/tests/nnfw_api/src/ValidationTestSessionCreated.cc b/tests/nnfw_api/src/ValidationTestSessionCreated.test.cc
index cb0791933..cb0791933 100644
--- a/tests/nnfw_api/src/ValidationTestSessionCreated.cc
+++ b/tests/nnfw_api/src/ValidationTestSessionCreated.test.cc
diff --git a/tests/nnfw_api/src/ValidationTestSingleSession.cc b/tests/nnfw_api/src/ValidationTestSingleSession.test.cc
index 852d5cd21..852d5cd21 100644
--- a/tests/nnfw_api/src/ValidationTestSingleSession.cc
+++ b/tests/nnfw_api/src/ValidationTestSingleSession.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Add.cc b/tests/nnfw_api/src/one_op_tests/Add.cc
deleted file mode 100644
index c21022972..000000000
--- a/tests/nnfw_api/src/one_op_tests/Add.cc
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-#include <memory>
-
-TEST_F(GenModelTest, OneOp_Add_VarToConst)
-{
-  CircleGen cgen;
-  std::vector<float> rhs_data{5, 4, 7, 4};
-  uint32_t rhs_buf = cgen.addBuffer(rhs_data);
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
-  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({lhs}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}}, {{6, 7, 9, 8}}));
-  _context->addTestCase(uniformTCD<float>({{0, 1, 2, 3}}, {{5, 5, 9, 7}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Add_VarToVar)
-{
-  CircleGen cgen;
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{6, 7, 9, 8}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Add_VarToVarUint8)
-{
-  CircleGen cgen;
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 1);
-  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
-  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 4);
-  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<uint8_t>({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{7, 8, 10, 9}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Add_VarToVarInt8)
-{
-  CircleGen cgen;
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1., 2);
-  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 2., 3);
-  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
-  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{0, -32, -46, 2}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_BroadcastAdd_VarToVarInt8)
-{
-  CircleGen cgen;
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1., 2);
-  int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_INT8}, 2., 3);
-  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
-  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5}}, {{0, 4, 2, 6}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Add_VarToVarSame)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{in, in}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}}, {{2, 6, 4, 8}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Add_VarToVarSize0)
-{
-  CircleGen cgen;
-  int a = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
-  int b = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
-  int c = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
-  int m = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{a, b}, {m}}, circle::ActivationFunctionType_NONE);
-  cgen.addOperatorAdd({{m, c}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({a, b, c}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>({{}, {}, {}}, {{}}));
-  _context->setBackends({"cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_InvalidType)
-{
-  CircleGen cgen;
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
-  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_DifferentQuant8Type)
-{
-  CircleGen cgen;
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.2, -3);
-  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
-  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_INT8});
-  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_InvalidShape)
-{
-  CircleGen cgen;
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->expectFailCompile();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_InvalidShapeConst)
-{
-  CircleGen cgen;
-  std::vector<float> rhs_data{5, 4, 0, 7, 4, 0};
-  uint32_t rhs_buf = cgen.addBuffer(rhs_data);
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
-  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({lhs}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->expectFailCompile();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_OneOperand)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{in}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_ThreeOperands)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{in, in, in}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_NoOutput)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{in}, {}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_InvalidActivation)
-{
-  CircleGen cgen;
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{lhs, rhs}, {out}},
-                      static_cast<circle::ActivationFunctionType>(128) /* Invalid value*/);
-  cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{6, 7, 9, 8}}));
-  _context->setBackends({"cpu"});
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_VarToVarSize0_InvalidShape)
-{
-  CircleGen cgen;
-  int a = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
-  int b = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
-  int c = cgen.addTensor({{2}, circle::TensorType::TensorType_FLOAT32});
-  int m = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{a, b}, {m}}, circle::ActivationFunctionType_NONE);
-  cgen.addOperatorAdd({{m, c}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({a, b, c}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailCompile();
-  _context->setBackends({"cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, net_OneOp_Add_VarToVarInt16)
-{
-  CircleGen cgen;
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 1., 2);
-  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 2., 3);
-  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 0.5, -6);
-  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  // _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{0, -32, -46, 2}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->expectFailCompile();
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Add.test.cc b/tests/nnfw_api/src/one_op_tests/Add.test.cc
new file mode 100644
index 000000000..9fc0e86b6
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Add.test.cc
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_Add_VarToConst)
+{
+  CircleGen cgen;
+  std::vector<float> rhs_data{5, 4, 7, 4};
+  uint32_t rhs_buf = cgen.addBuffer(rhs_data);
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}}, {{6, 7, 9, 8}}));
+  _context->addTestCase(uniformTCD<float>({{0, 1, 2, 3}}, {{5, 5, 9, 7}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{6, 7, 9, 8}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVarUint8)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 1);
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 4);
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<uint8_t>({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{7, 8, 10, 9}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVarInt8)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1., 2);
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 2., 3);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{0, -32, -46, 2}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_BroadcastAdd_VarToVarInt8)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1., 2);
+  int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_INT8}, 2., 3);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5}}, {{0, 4, 2, 6}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVarSame)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{in, in}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}}, {{2, 6, 4, 8}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVarSize0)
+{
+  CircleGen cgen;
+  int a = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+  int b = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+  int c = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+  int m = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{a, b}, {m}}, circle::ActivationFunctionType_NONE);
+  cgen.addOperatorAdd({{m, c}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({a, b, c}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{}, {}, {}}, {{}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_InvalidType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_DifferentQuant8Type)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.2, -3);
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_INT8});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_InvalidShape)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_InvalidShapeConst)
+{
+  CircleGen cgen;
+  std::vector<float> rhs_data{5, 4, 0, 7, 4, 0};
+  uint32_t rhs_buf = cgen.addBuffer(rhs_data);
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_OneOperand)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{in}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_ThreeOperands)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{in, in, in}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_NoOutput)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{in}, {}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_InvalidActivation)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}},
+                      static_cast<circle::ActivationFunctionType>(128) /* Invalid value*/);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{6, 7, 9, 8}}));
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_VarToVarSize0_InvalidShape)
+{
+  CircleGen cgen;
+  int a = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+  int b = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+  int c = cgen.addTensor({{2}, circle::TensorType::TensorType_FLOAT32});
+  int m = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{a, b}, {m}}, circle::ActivationFunctionType_NONE);
+  cgen.addOperatorAdd({{m, c}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({a, b, c}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailCompile();
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_VarToVarInt16)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 1., 2);
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 2., 3);
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 0.5, -6);
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  // _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{0, -32, -46, 2}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/AddN.cc b/tests/nnfw_api/src/one_op_tests/AddN.test.cc
index 73fa82168..73fa82168 100644
--- a/tests/nnfw_api/src/one_op_tests/AddN.cc
+++ b/tests/nnfw_api/src/one_op_tests/AddN.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc b/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc
deleted file mode 100644
index dda098698..000000000
--- a/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-#include <memory>
-
-struct ArgMinMaxVariationParam
-{
-  TestCaseData tcd;
-  bool is_argmax = true;
-  circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
-  float scale = 0.0f;
-  int64_t zero_point = 0;
-};
-
-class ArgMinMaxVariation : public GenModelTest,
-                           public ::testing::WithParamInterface<ArgMinMaxVariationParam>
-{
-};
-
-// Input shape: {1, 2, 2, 1}
-// Reduce axis: 1
-// Output shape: {1, 2, 1}
-// Output type: Int32
-// Test with different input type and value
-INSTANTIATE_TEST_CASE_P(
-  GenModelTest, ArgMinMaxVariation,
-  ::testing::Values(
-    // ArgMax, float input
-    ArgMinMaxVariationParam{TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}),
-                            true},
-    // ArgMax, int32 input
-    ArgMinMaxVariationParam{
-      TestCaseData{}.addInput<int32_t>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}), true,
-      circle::TensorType::TensorType_INT32},
-    // ArgMax, uint8 input
-    ArgMinMaxVariationParam{
-      TestCaseData{}.addInput<uint8_t>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}), true,
-      circle::TensorType::TensorType_UINT8, 1.0, 1},
-    // ArgMax, int8 input
-    ArgMinMaxVariationParam{
-      TestCaseData{}.addInput<int8_t>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}), true,
-      circle::TensorType::TensorType_INT8, 1.0, 1},
-    // ArgMin, float input
-    ArgMinMaxVariationParam{TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}),
-                            false},
-    // ArgMin, int32 input
-    ArgMinMaxVariationParam{
-      TestCaseData{}.addInput<int32_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
-      circle::TensorType::TensorType_INT32},
-    // ArgMin, uint8 input
-    ArgMinMaxVariationParam{
-      TestCaseData{}.addInput<uint8_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
-      circle::TensorType::TensorType_UINT8, 1.0, 1},
-    // ArgMin, int8 input
-    ArgMinMaxVariationParam{
-      TestCaseData{}.addInput<int8_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
-      circle::TensorType::TensorType_INT8, 1.0, 1}));
-
-TEST_P(ArgMinMaxVariation, Test)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-  const auto output_type = circle::TensorType::TensorType_INT32;
-  std::vector<int32_t> axis_data{1};
-  uint32_t axis_buf = cgen.addBuffer(axis_data);
-  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
-  int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
-  int out = cgen.addTensor({{1, 2, 1}, output_type});
-  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
-                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(param.tcd);
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_ArgMax_Int64_AxisToConst)
-{
-  CircleGen cgen;
-  const auto output_type = circle::TensorType::TensorType_INT64;
-  std::vector<int32_t> axis_data{1};
-  uint32_t axis_buf = cgen.addBuffer(axis_data);
-  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 1}, output_type});
-  cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int64_t>({1, 0}));
-  _context->setBackends({"acl_cl", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_ArgMax_AxisToVar)
-{
-  CircleGen cgen;
-  const auto output_type = circle::TensorType::TensorType_INT32;
-  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 1}, output_type});
-  cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
-  cgen.setInputsAndOutputs({in, axis}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(TestCaseData{}
-                          .addInput<float>({1, 4, 2, 3})
-                          .addInput<int32_t>({-3})
-                          .addOutput<int32_t>({1, 0}));
-  _context->setBackends({"cpu"});
-
-  SUCCEED();
-}
-
-TEST_P(ArgMinMaxVariation, neg_InvalidAxis0)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-  const auto output_type = circle::TensorType::TensorType_INT32;
-  std::vector<int32_t> axis_data{4};
-  uint32_t axis_buf = cgen.addBuffer(axis_data);
-  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
-  int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
-  int out = cgen.addTensor({{1, 2, 1}, output_type});
-  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
-                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailCompile();
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_P(ArgMinMaxVariation, neg_InvalidAxis1)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-  const auto output_type = circle::TensorType::TensorType_INT32;
-  std::vector<int32_t> axis_data{-3};
-  uint32_t axis_buf = cgen.addBuffer(axis_data);
-  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
-  int in = cgen.addTensor({{2, 2}, param.input_type}, param.scale, param.zero_point);
-  int out = cgen.addTensor({{2}, output_type});
-  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
-                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->expectFailCompile();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_ArgMax_InType)
-{
-  CircleGen cgen;
-  const auto output_type = circle::TensorType::TensorType_INT32;
-  std::vector<int32_t> axis_data{4};
-  uint32_t axis_buf = cgen.addBuffer(axis_data);
-  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
-  int out = cgen.addTensor({{1, 2, 1}, output_type});
-  cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_P(ArgMinMaxVariation, neg_AxisType)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-  const auto output_type = circle::TensorType::TensorType_INT32;
-  std::vector<float> axis_data{4};
-  uint32_t axis_buf = cgen.addBuffer(axis_data);
-  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, axis_buf});
-  int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
-  int out = cgen.addTensor({{1, 2, 1}, output_type});
-  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
-                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_ArgMax_OutType)
-{
-  CircleGen cgen;
-  const auto output_type = circle::TensorType::TensorType_FLOAT32;
-  std::vector<int32_t> axis_data{4};
-  uint32_t axis_buf = cgen.addBuffer(axis_data);
-  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 1}, output_type});
-  cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_P(ArgMinMaxVariation, neg_paramType)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-  const auto output_type = circle::TensorType::TensorType_INT32;
-  const auto output_param = circle::TensorType::TensorType_INT64;
-  std::vector<int32_t> axis_data{4};
-  uint32_t axis_buf = cgen.addBuffer(axis_data);
-  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
-  int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
-  int out = cgen.addTensor({{1, 2, 1}, output_type});
-  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_param)
-                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_param);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/ArgMinMax.test.cc b/tests/nnfw_api/src/one_op_tests/ArgMinMax.test.cc
new file mode 100644
index 000000000..1321552db
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/ArgMinMax.test.cc
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+struct ArgMinMaxVariationParam
+{
+  TestCaseData tcd;
+  bool is_argmax = true;
+  circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+  float scale = 0.0f;
+  int64_t zero_point = 0;
+};
+
+class ArgMinMaxVariation : public GenModelTest,
+                           public ::testing::WithParamInterface<ArgMinMaxVariationParam>
+{
+};
+
+// Input shape: {1, 2, 2, 1}
+// Reduce axis: 1
+// Output shape: {1, 2, 1}
+// Output type: Int32
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, ArgMinMaxVariation,
+  ::testing::Values(
+    // ArgMax, float input
+    ArgMinMaxVariationParam{TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}),
+                            true},
+    // ArgMax, int32 input
+    ArgMinMaxVariationParam{
+      TestCaseData{}.addInput<int32_t>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}), true,
+      circle::TensorType::TensorType_INT32},
+    // ArgMax, uint8 input
+    ArgMinMaxVariationParam{
+      TestCaseData{}.addInput<uint8_t>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}), true,
+      circle::TensorType::TensorType_UINT8, 1.0, 1},
+    // ArgMax, int8 input
+    ArgMinMaxVariationParam{
+      TestCaseData{}.addInput<int8_t>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}), true,
+      circle::TensorType::TensorType_INT8, 1.0, 1},
+    // ArgMin, float input
+    ArgMinMaxVariationParam{TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}),
+                            false},
+    // ArgMin, int32 input
+    ArgMinMaxVariationParam{
+      TestCaseData{}.addInput<int32_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
+      circle::TensorType::TensorType_INT32},
+    // ArgMin, uint8 input
+    ArgMinMaxVariationParam{
+      TestCaseData{}.addInput<uint8_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
+      circle::TensorType::TensorType_UINT8, 1.0, 1},
+    // ArgMin, int8 input
+    ArgMinMaxVariationParam{
+      TestCaseData{}.addInput<int8_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
+      circle::TensorType::TensorType_INT8, 1.0, 1}));
+
+TEST_P(ArgMinMaxVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_INT32;
+  std::vector<int32_t> axis_data{1};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({{1, 2, 1}, output_type});
+  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_ArgMax_Int64_AxisToConst)
+{
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_INT64;
+  std::vector<int32_t> axis_data{1};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 1}, output_type});
+  cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int64_t>({1, 0}));
+  _context->setBackends({"acl_cl", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_ArgMax_AxisToVar)
+{
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_INT32;
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 1}, output_type});
+  cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+  cgen.setInputsAndOutputs({in, axis}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}
+                          .addInput<float>({1, 4, 2, 3})
+                          .addInput<int32_t>({-3})
+                          .addOutput<int32_t>({1, 0}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_P(ArgMinMaxVariation, neg_InvalidAxis0)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_INT32;
+  std::vector<int32_t> axis_data{4};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({{1, 2, 1}, output_type});
+  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailCompile();
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_P(ArgMinMaxVariation, neg_InvalidAxis1)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_INT32;
+  std::vector<int32_t> axis_data{-3};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int in = cgen.addTensor({{2, 2}, param.input_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({{2}, output_type});
+  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_ArgMax_InType)
+{
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_INT32;
+  std::vector<int32_t> axis_data{4};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+  int out = cgen.addTensor({{1, 2, 1}, output_type});
+  cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_P(ArgMinMaxVariation, neg_AxisType)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_INT32;
+  std::vector<float> axis_data{4};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, axis_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({{1, 2, 1}, output_type});
+  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_ArgMax_OutType)
+{
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_FLOAT32;
+  std::vector<int32_t> axis_data{4};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 1}, output_type});
+  cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_P(ArgMinMaxVariation, neg_paramType)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_INT32;
+  const auto output_param = circle::TensorType::TensorType_INT64;
+  std::vector<int32_t> axis_data{4};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({{1, 2, 1}, output_type});
+  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_param)
+                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_param);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc b/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc
deleted file mode 100644
index 15ddac210..000000000
--- a/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-struct AvgPool2DParam
-{
-  TestCaseData tcd;
-  std::vector<int32_t> input_shape;
-  std::vector<int32_t> output_shape;
-  struct filter_stride
-  {
-    int32_t filter_w;
-    int32_t filter_h;
-    int32_t stride_w;
-    int32_t stride_h;
-  } param = {1, 1, 1, 1};
-  struct data_type
-  {
-    circle::TensorType data_type;
-    float scale;
-    int64_t zero_point;
-  } type = {circle::TensorType::TensorType_FLOAT32, 0.0f, 0};
-  std::vector<std::string> backend = {"acl_cl", "acl_neon", "cpu", "gpu_cl"};
-};
-
-class AveragePool2DVariation : public GenModelTest,
-                               public ::testing::WithParamInterface<AvgPool2DParam>
-{
-};
-
-// Test with different input type and value
-INSTANTIATE_TEST_CASE_P(
-  GenModelTest, AveragePool2DVariation,
-  ::testing::Values(
-    // float data
-    AvgPool2DParam{
-      uniformTCD<float>({{1, 3, 2, 4}}, {{2.5}}), {1, 2, 2, 1}, {1, 1, 1, 1}, {2, 2, 2, 2}},
-    // float data - large
-    AvgPool2DParam{uniformTCD<float>({std::vector<float>(18 * 36 * 2, 99)}, {{99, 99, 99, 99}}),
-                   {1, 18, 36, 2},
-                   {1, 1, 2, 2},
-                   {18, 18, 18, 18}},
-    // uint8_t data
-    AvgPool2DParam{uniformTCD<uint8_t>({{2, 6, 4, 8}}, {{5}}),
-                   {1, 2, 2, 1},
-                   {1, 1, 1, 1},
-                   {2, 2, 2, 2},
-                   {circle::TensorType::TensorType_UINT8, 1.2, 3},
-                   {"acl_cl", "acl_neon", "cpu"}},
-    // uint8_t data -large
-    AvgPool2DParam{
-      uniformTCD<uint8_t>({{std::vector<uint8_t>(18 * 36 * 2, 99)}}, {{99, 99, 99, 99}}),
-      {1, 18, 36, 2},
-      {1, 1, 2, 2},
-      {18, 18, 18, 18},
-      {circle::TensorType::TensorType_UINT8, 1.2, 3},
-      {"acl_cl", "acl_neon", "cpu"}},
-    // int8_t data
-    // TODO enable acl-cl, acl-neon backend
-    AvgPool2DParam{uniformTCD<int8_t>({{2, -6, 4, -8}}, {{-2}}),
-                   {1, 2, 2, 1},
-                   {1, 1, 1, 1},
-                   {2, 2, 2, 2},
-                   {circle::TensorType::TensorType_INT8, 2.0, -1},
-                   {"cpu"}},
-    // int8_t data - large
-    // TODO enable acl-cl, acl-neon backend
-    AvgPool2DParam{
-      uniformTCD<int8_t>({{std::vector<int8_t>(18 * 36 * 2, -99)}}, {{-99, -99, -99, -99}}),
-      {1, 18, 36, 2},
-      {1, 1, 2, 2},
-      {18, 18, 18, 18},
-      {circle::TensorType::TensorType_INT8, 2.0, -1},
-      {"cpu"}}));
-
-TEST_P(AveragePool2DVariation, Test)
-{
-  auto &param = GetParam();
-  CircleGen cgen;
-
-  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
-                          param.type.zero_point);
-  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
-                           param.type.zero_point);
-  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
-                                param.param.stride_h, param.param.filter_w, param.param.filter_h,
-                                circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(param.tcd);
-  _context->setBackends(param.backend);
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_AvgPool2D_3DInput)
-{
-  // 3D Tensors are not supported
-  CircleGen cgen;
-  int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
-                                circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
-  _context->expectFailCompile();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_AvgPool2D_2DInput)
-{
-  // 2D Tensors are not supported
-  CircleGen cgen;
-  int in = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
-                                circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
-  _context->expectFailCompile();
-
-  SUCCEED();
-}
-
-TEST_P(AveragePool2DVariation, neg_InvalidPaddingType)
-{
-  auto &param = GetParam();
-  CircleGen cgen;
-
-  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
-                          param.type.zero_point);
-  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
-                           param.type.zero_point);
-  cgen.addOperatorAveragePool2D({{in}, {out}}, static_cast<circle::Padding>(99),
-                                param.param.stride_w, param.param.stride_h, param.param.filter_w,
-                                param.param.filter_h, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_1)
-{
-  auto &param = GetParam();
-  CircleGen cgen;
-
-  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
-                          param.type.zero_point);
-  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
-                           param.type.zero_point);
-  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
-                                param.param.stride_h, -1, param.param.filter_h,
-                                circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_2)
-{
-  auto &param = GetParam();
-  CircleGen cgen;
-
-  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
-                          param.type.zero_point);
-  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
-                           param.type.zero_point);
-  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
-                                param.param.stride_h, param.param.filter_w, 0,
-                                circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_P(AveragePool2DVariation, neg_InvalidStrides_1)
-{
-  auto &param = GetParam();
-  CircleGen cgen;
-
-  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
-                          param.type.zero_point);
-  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
-                           param.type.zero_point);
-  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 0, param.param.stride_h,
-                                param.param.filter_w, param.param.filter_h,
-                                circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_P(AveragePool2DVariation, neg_InvalidStrides_2)
-{
-  auto &param = GetParam();
-  CircleGen cgen;
-
-  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
-                          param.type.zero_point);
-  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
-                           param.type.zero_point);
-  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, -100,
-                                param.param.filter_w, param.param.filter_h,
-                                circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/AveragePool2D.test.cc b/tests/nnfw_api/src/one_op_tests/AveragePool2D.test.cc
new file mode 100644
index 000000000..8276ca4c1
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/AveragePool2D.test.cc
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct AvgPool2DParam
+{
+  TestCaseData tcd;
+  std::vector<int32_t> input_shape;
+  std::vector<int32_t> output_shape;
+  struct filter_stride
+  {
+    int32_t filter_w;
+    int32_t filter_h;
+    int32_t stride_w;
+    int32_t stride_h;
+  } param = {1, 1, 1, 1};
+  struct data_type
+  {
+    circle::TensorType data_type;
+    float scale;
+    int64_t zero_point;
+  } type = {circle::TensorType::TensorType_FLOAT32, 0.0f, 0};
+  std::vector<std::string> backend = {"acl_cl", "acl_neon", "cpu", "gpu_cl"};
+};
+
+class AveragePool2DVariation : public GenModelTest,
+                               public ::testing::WithParamInterface<AvgPool2DParam>
+{
+};
+
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, AveragePool2DVariation,
+  ::testing::Values(
+    // float data
+    AvgPool2DParam{
+      uniformTCD<float>({{1, 3, 2, 4}}, {{2.5}}), {1, 2, 2, 1}, {1, 1, 1, 1}, {2, 2, 2, 2}},
+    // float data - large
+    AvgPool2DParam{uniformTCD<float>({std::vector<float>(18 * 36 * 2, 99)}, {{99, 99, 99, 99}}),
+                   {1, 18, 36, 2},
+                   {1, 1, 2, 2},
+                   {18, 18, 18, 18}},
+    // uint8_t data
+    AvgPool2DParam{uniformTCD<uint8_t>({{2, 6, 4, 8}}, {{5}}),
+                   {1, 2, 2, 1},
+                   {1, 1, 1, 1},
+                   {2, 2, 2, 2},
+                   {circle::TensorType::TensorType_UINT8, 1.2, 3},
+                   {"acl_cl", "acl_neon", "cpu"}},
+    // uint8_t data -large
+    AvgPool2DParam{
+      uniformTCD<uint8_t>({{std::vector<uint8_t>(18 * 36 * 2, 99)}}, {{99, 99, 99, 99}}),
+      {1, 18, 36, 2},
+      {1, 1, 2, 2},
+      {18, 18, 18, 18},
+      {circle::TensorType::TensorType_UINT8, 1.2, 3},
+      {"acl_cl", "acl_neon", "cpu"}},
+    // int8_t data
+    // TODO enable acl-cl, acl-neon backend
+    AvgPool2DParam{uniformTCD<int8_t>({{2, -6, 4, -8}}, {{-2}}),
+                   {1, 2, 2, 1},
+                   {1, 1, 1, 1},
+                   {2, 2, 2, 2},
+                   {circle::TensorType::TensorType_INT8, 2.0, -1},
+                   {"cpu"}},
+    // int8_t data - large
+    // TODO enable acl-cl, acl-neon backend
+    AvgPool2DParam{
+      uniformTCD<int8_t>({{std::vector<int8_t>(18 * 36 * 2, -99)}}, {{-99, -99, -99, -99}}),
+      {1, 18, 36, 2},
+      {1, 1, 2, 2},
+      {18, 18, 18, 18},
+      {circle::TensorType::TensorType_INT8, 2.0, -1},
+      {"cpu"}}));
+
+TEST_P(AveragePool2DVariation, Test)
+{
+  auto &param = GetParam();
+  CircleGen cgen;
+
+  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+                          param.type.zero_point);
+  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+                           param.type.zero_point);
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
+                                param.param.stride_h, param.param.filter_w, param.param.filter_h,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends(param.backend);
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_AvgPool2D_3DInput)
+{
+  // 3D Tensors are not supported
+  CircleGen cgen;
+  int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_AvgPool2D_2DInput)
+{
+  // 2D Tensors are not supported
+  CircleGen cgen;
+  int in = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_P(AveragePool2DVariation, neg_InvalidPaddingType)
+{
+  auto &param = GetParam();
+  CircleGen cgen;
+
+  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+                          param.type.zero_point);
+  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+                           param.type.zero_point);
+  cgen.addOperatorAveragePool2D({{in}, {out}}, static_cast<circle::Padding>(99),
+                                param.param.stride_w, param.param.stride_h, param.param.filter_w,
+                                param.param.filter_h, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_1)
+{
+  auto &param = GetParam();
+  CircleGen cgen;
+
+  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+                          param.type.zero_point);
+  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+                           param.type.zero_point);
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
+                                param.param.stride_h, -1, param.param.filter_h,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_2)
+{
+  auto &param = GetParam();
+  CircleGen cgen;
+
+  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+                          param.type.zero_point);
+  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+                           param.type.zero_point);
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
+                                param.param.stride_h, param.param.filter_w, 0,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_P(AveragePool2DVariation, neg_InvalidStrides_1)
+{
+  auto &param = GetParam();
+  CircleGen cgen;
+
+  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+                          param.type.zero_point);
+  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+                           param.type.zero_point);
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 0, param.param.stride_h,
+                                param.param.filter_w, param.param.filter_h,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_P(AveragePool2DVariation, neg_InvalidStrides_2)
+{
+  auto &param = GetParam();
+  CircleGen cgen;
+
+  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+                          param.type.zero_point);
+  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+                           param.type.zero_point);
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, -100,
+                                param.param.filter_w, param.param.filter_h,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/BatchToSpaceND.cc b/tests/nnfw_api/src/one_op_tests/BatchToSpaceND.test.cc
index 3f4554302..3f4554302 100644
--- a/tests/nnfw_api/src/one_op_tests/BatchToSpaceND.cc
+++ b/tests/nnfw_api/src/one_op_tests/BatchToSpaceND.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Cast.cc b/tests/nnfw_api/src/one_op_tests/Cast.cc
deleted file mode 100644
index 928df2d24..000000000
--- a/tests/nnfw_api/src/one_op_tests/Cast.cc
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-#include <memory>
-
-CircleGen genSimpleCastModel(circle::TensorType from_t, circle::TensorType to_t)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, from_t});
-  int out = cgen.addTensor({{1, 2, 2, 1}, to_t});
-  cgen.addOperatorCast({{in}, {out}}, from_t, to_t);
-  cgen.setInputsAndOutputs({in}, {out});
-  return cgen;
-}
-
-TEST_F(GenModelTest, OneOp_Cast_Int32ToFloat32)
-{
-  CircleGen cgen = genSimpleCastModel(circle::TensorType_INT32, circle::TensorType_FLOAT32);
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(
-    TestCaseData{}.addInput<int32_t>({1, 2, 3, 4}).addOutput<float>({1, 2, 3, 4}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Cast_Float32ToInt32)
-{
-  CircleGen cgen = genSimpleCastModel(circle::TensorType_FLOAT32, circle::TensorType_INT32);
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(
-    TestCaseData{}.addInput<float>({1, 2, 3, 4}).addOutput<int32_t>({1, 2, 3, 4}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Cast_BoolToFloat32)
-{
-  CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_FLOAT32);
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(
-    TestCaseData{}.addInput<bool>({true, false, true, true}).addOutput<float>({1, 0, 1, 1}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Cast_BoolToUInt8)
-{
-  CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_UINT8);
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(TestCaseData{}
-                          .addInput<bool>({true, false, true, true})
-                          .addOutput(std::vector<uint8_t>{1, 0, 1, 1}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Cast_BoolToInt32)
-{
-  CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_INT32);
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(
-    TestCaseData{}.addInput<bool>({true, false, true, true}).addOutput<int32_t>({1, 0, 1, 1}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Cast_AfterEqual)
-{
-  CircleGen cgen;
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int equal_out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
-  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorEqual({{lhs, rhs}, {equal_out}});
-  cgen.addOperatorCast({{equal_out}, {out}}, circle::TensorType::TensorType_BOOL,
-                       circle::TensorType::TensorType_FLOAT32);
-  cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}, {2, 3, 1, 4}}, {{0, 1, 0, 1}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Cast_InvalidInputCount0)
-{
-  CircleGen cgen;
-  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
-  cgen.addOperatorCast({{}, {out}}, circle::TensorType::TensorType_FLOAT32,
-                       circle::TensorType::TensorType_INT32);
-  cgen.setInputsAndOutputs({}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Cast_InvalidInputCount2)
-{
-  CircleGen cgen;
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
-  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
-  int out = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorCast({{lhs, rhs}, {out}}, circle::TensorType::TensorType_INT32,
-                       circle::TensorType::TensorType_FLOAT32);
-  cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Cast_InvalidOutputCount0)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
-  cgen.addOperatorCast({{in}, {}}, circle::TensorType::TensorType_INT32,
-                       circle::TensorType::TensorType_FLOAT32);
-  cgen.setInputsAndOutputs({in}, {});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Cast_InvalidOutputCount2)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
-  int out1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out2 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
-  cgen.addOperatorCast({{in}, {out1, out2}}, circle::TensorType::TensorType_INT32,
-                       circle::TensorType::TensorType_FLOAT32);
-  cgen.setInputsAndOutputs({in}, {out1, out2});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Cast.test.cc b/tests/nnfw_api/src/one_op_tests/Cast.test.cc
new file mode 100644
index 000000000..b4cfa6f8f
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Cast.test.cc
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+CircleGen genSimpleCastModel(circle::TensorType from_t, circle::TensorType to_t)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, from_t});
+  int out = cgen.addTensor({{1, 2, 2, 1}, to_t});
+  cgen.addOperatorCast({{in}, {out}}, from_t, to_t);
+  cgen.setInputsAndOutputs({in}, {out});
+  return cgen;
+}
+
+TEST_F(GenModelTest, OneOp_Cast_Int32ToFloat32)
+{
+  CircleGen cgen = genSimpleCastModel(circle::TensorType_INT32, circle::TensorType_FLOAT32);
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<int32_t>({1, 2, 3, 4}).addOutput<float>({1, 2, 3, 4}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_Float32ToInt32)
+{
+  CircleGen cgen = genSimpleCastModel(circle::TensorType_FLOAT32, circle::TensorType_INT32);
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<float>({1, 2, 3, 4}).addOutput<int32_t>({1, 2, 3, 4}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_BoolToFloat32)
+{
+  CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_FLOAT32);
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<bool>({true, false, true, true}).addOutput<float>({1, 0, 1, 1}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_BoolToUInt8)
+{
+  CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_UINT8);
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}
+                          .addInput<bool>({true, false, true, true})
+                          .addOutput(std::vector<uint8_t>{1, 0, 1, 1}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_BoolToInt32)
+{
+  CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_INT32);
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<bool>({true, false, true, true}).addOutput<int32_t>({1, 0, 1, 1}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_Uint8ToFloat32)
+{
+  CircleGen cgen = genSimpleCastModel(circle::TensorType_UINT8, circle::TensorType_FLOAT32);
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  // clang-format off
+  _context->addTestCase(
+    TestCaseData{}.addInput<uint8_t>({0, 100, 200, 255})
+                  .addOutput<float>({0., 100., 200., 255.}));
+  // clang-format on
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_Int64ToFloat32)
+{
+  CircleGen cgen = genSimpleCastModel(circle::TensorType_INT64, circle::TensorType_FLOAT32);
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}
+                          .addInput<int64_t>({-12345, 3, 100, 2147483648})
+                          .addOutput<float>({-12345., 3., 100., 2147483648.}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_AfterEqual)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int equal_out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorEqual({{lhs, rhs}, {equal_out}});
+  cgen.addOperatorCast({{equal_out}, {out}}, circle::TensorType::TensorType_BOOL,
+                       circle::TensorType::TensorType_FLOAT32);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}, {2, 3, 1, 4}}, {{0, 1, 0, 1}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Cast_InvalidInputCount0)
+{
+  CircleGen cgen;
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorCast({{}, {out}}, circle::TensorType::TensorType_FLOAT32,
+                       circle::TensorType::TensorType_INT32);
+  cgen.setInputsAndOutputs({}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Cast_InvalidInputCount2)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorCast({{lhs, rhs}, {out}}, circle::TensorType::TensorType_INT32,
+                       circle::TensorType::TensorType_FLOAT32);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Cast_InvalidOutputCount0)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorCast({{in}, {}}, circle::TensorType::TensorType_INT32,
+                       circle::TensorType::TensorType_FLOAT32);
+  cgen.setInputsAndOutputs({in}, {});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Cast_InvalidOutputCount2)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out2 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorCast({{in}, {out1, out2}}, circle::TensorType::TensorType_INT32,
+                       circle::TensorType::TensorType_FLOAT32);
+  cgen.setInputsAndOutputs({in}, {out1, out2});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Concat.cc b/tests/nnfw_api/src/one_op_tests/Concat.cc
deleted file mode 100644
index f4397ba66..000000000
--- a/tests/nnfw_api/src/one_op_tests/Concat.cc
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-#include <memory>
-
-TEST_F(GenModelTest, OneOp_Concat_ShareSubTensor)
-{
-  CircleGen cgen;
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int shared_subtensor = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int concat_out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
-  std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
-  uint32_t padding_buf = cgen.addBuffer(padding_data);
-  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
-  int pad_out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{lhs, rhs}, {shared_subtensor}}, circle::ActivationFunctionType_NONE);
-  cgen.addOperatorConcatenation({{rhs, shared_subtensor}, {concat_out}}, 3,
-                                circle::ActivationFunctionType_NONE);
-  cgen.addOperatorPad({{shared_subtensor, padding}, {pad_out}});
-  cgen.setInputsAndOutputs({lhs, rhs}, {pad_out, concat_out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>(
-    {{1, 3, 2, 4}, {5, 4, 7, 4}},
-    {{0, 0, 0, 0, 0, 6, 7, 0, 0, 9, 8, 0, 0, 0, 0, 0}, {5, 6, 4, 7, 7, 9, 4, 8}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-struct ConcatVariationParam
-{
-  TestCaseData tcd;
-  circle::TensorType type = circle::TensorType::TensorType_FLOAT32;
-  float scale = 0.0f;
-  int64_t zero_point = 0;
-};
-
-class ConcatVariation : public GenModelTest,
-                        public ::testing::WithParamInterface<ConcatVariationParam>
-{
-};
-
-// Input shape: {2, 3} / {2, 3}
-// Output shape: {4, 3}
-INSTANTIATE_TEST_CASE_P(
-  GenModelTest, ConcatVariation,
-  ::testing::Values(
-    // Float
-    ConcatVariationParam{uniformTCD<float>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
-                                           {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}})},
-    // Uint8
-    ConcatVariationParam{uniformTCD<uint8_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
-                                             {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
-                         circle::TensorType::TensorType_UINT8, 1.0f, -2},
-    // Int8
-    ConcatVariationParam{uniformTCD<int8_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
-                                            {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
-                         circle::TensorType::TensorType_INT8, 1.0f, -2},
-    // Int16
-    // TODO Enable when nnfw api support int16 type
-    // ConcatVariationParam{
-    //    uniformTCD<int16_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
-    //                                  {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
-    //    circle::TensorType::TensorType_INT16, 1.0f, 0},
-    // Int32
-    ConcatVariationParam{uniformTCD<int32_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
-                                             {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
-                         circle::TensorType::TensorType_INT32},
-    // Int64
-    ConcatVariationParam{uniformTCD<int64_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
-                                             {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
-                         circle::TensorType::TensorType_INT64}));
-
-TEST_P(ConcatVariation, Test)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-  int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
-  int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
-  int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
-  cgen.addOperatorConcatenation({{input1, input2}, {output}}, 0,
-                                circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({input1, input2}, {output});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(param.tcd);
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Concat_Subtensor_4D)
-{
-  CircleGen cgen;
-  int in1 = cgen.addTensor({{1, 1, 1, 20}, circle::TensorType::TensorType_FLOAT32});
-  int in2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
-  std::vector<int32_t> axis_data{3};
-  uint32_t axis_buf = cgen.addBuffer(axis_data);
-  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
-
-  int s_out1 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
-  int s_out2 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
-  int s_out3 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
-  int s_out4 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
-
-  int c_out1 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
-  int c_out2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
-  int c_out3 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
-
-  int a_out1 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
-  int a_out2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
-  int a_out3 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
-
-  int final_out = cgen.addTensor({{1, 1, 1, 35}, circle::TensorType::TensorType_FLOAT32});
-
-  cgen.addOperatorSplit({{axis, in1}, {s_out1, s_out2, s_out3, s_out4}}, 4);
-
-  cgen.addOperatorConcatenation({{s_out1, s_out2}, {c_out1}}, 3,
-                                circle::ActivationFunctionType::ActivationFunctionType_NONE);
-  cgen.addOperatorConcatenation({{s_out1, s_out3}, {c_out2}}, 3,
-                                circle::ActivationFunctionType::ActivationFunctionType_NONE);
-  cgen.addOperatorConcatenation({{s_out1, s_out4}, {c_out3}}, 3,
-                                circle::ActivationFunctionType::ActivationFunctionType_NONE);
-
-  cgen.addOperatorAdd({{c_out1, in2}, {a_out1}},
-                      circle::ActivationFunctionType::ActivationFunctionType_NONE);
-  cgen.addOperatorAdd({{c_out2, in2}, {a_out2}},
-                      circle::ActivationFunctionType::ActivationFunctionType_NONE);
-  cgen.addOperatorAdd({{c_out3, in2}, {a_out3}},
-                      circle::ActivationFunctionType::ActivationFunctionType_NONE);
-
-  cgen.addOperatorConcatenation({{s_out1, a_out1, a_out2, a_out3}, {final_out}}, 3,
-                                circle::ActivationFunctionType::ActivationFunctionType_NONE);
-
-  cgen.setInputsAndOutputs({in1, in2}, {s_out1, s_out2, s_out3, s_out4, c_out1, c_out2, c_out3,
-                                        a_out1, a_out2, a_out3, final_out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>(
-    {
-      // inputs
-      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, // in1
-      {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}                                           // in2
-    },
-    {
-      // outputs
-      {1, 2, 3, 4, 5},                     // s_out1
-      {6, 7, 8, 9, 10},                    // s_out2
-      {11, 12, 13, 14, 15},                // s_out3
-      {16, 17, 18, 19, 20},                // s_out4
-      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},     // c_out1
-      {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // c_out2
-      {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // c_out3
-      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},     // a_out1
-      {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // a_out2
-      {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // a_out3
-      {1, 2, 3,  4,  5,  1,  2,  3, 4, 5, 6, 7, 8,  9,  10, 1,  2, 3,
-       4, 5, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20} // final_out
-    }));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_P(ConcatVariation, neg_InvalidAxis)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-  int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
-  int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
-  int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
-  int axis = 2;
-
-  cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
-                                circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({input1, input2}, {output});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"cpu"});
-  _context->expectFailCompile();
-
-  SUCCEED();
-}
-
-TEST_P(ConcatVariation, neg_InvalidRank)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-  int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
-  int input2 = cgen.addTensor({{1, 2, 3}, param.type}, param.scale, param.zero_point);
-  int output = cgen.addTensor({{1, 4, 3}, param.type}, param.scale, param.zero_point);
-  int axis = 0;
-
-  cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
-                                circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({input1, input2}, {output});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->expectFailCompile();
-
-  SUCCEED();
-}
-
-TEST_P(ConcatVariation, neg_InvalidDimension)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-  int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
-  int input2 = cgen.addTensor({{3, 2}, param.type}, param.scale, param.zero_point);
-  int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
-  int axis = 0;
-
-  cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
-                                circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({input1, input2}, {output});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->expectFailCompile();
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Concat.test.cc b/tests/nnfw_api/src/one_op_tests/Concat.test.cc
new file mode 100644
index 000000000..4f8360353
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Concat.test.cc
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_Concat_ShareSubTensor)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int shared_subtensor = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int concat_out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  int pad_out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {shared_subtensor}}, circle::ActivationFunctionType_NONE);
+  cgen.addOperatorConcatenation({{rhs, shared_subtensor}, {concat_out}}, 3,
+                                circle::ActivationFunctionType_NONE);
+  cgen.addOperatorPad({{shared_subtensor, padding}, {pad_out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {pad_out, concat_out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>(
+    {{1, 3, 2, 4}, {5, 4, 7, 4}},
+    {{0, 0, 0, 0, 0, 6, 7, 0, 0, 9, 8, 0, 0, 0, 0, 0}, {5, 6, 4, 7, 7, 9, 4, 8}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+struct ConcatVariationParam
+{
+  TestCaseData tcd;
+  circle::TensorType type = circle::TensorType::TensorType_FLOAT32;
+  float scale = 0.0f;
+  int64_t zero_point = 0;
+};
+
+class ConcatVariation : public GenModelTest,
+                        public ::testing::WithParamInterface<ConcatVariationParam>
+{
+};
+
+// Input shape: {2, 3} / {2, 3}
+// Output shape: {4, 3}
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, ConcatVariation,
+  ::testing::Values(
+    // Float
+    ConcatVariationParam{uniformTCD<float>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+                                           {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}})},
+    // Uint8
+    ConcatVariationParam{uniformTCD<uint8_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+                                             {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+                         circle::TensorType::TensorType_UINT8, 1.0f, -2},
+    // Int8
+    ConcatVariationParam{uniformTCD<int8_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+                                            {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+                         circle::TensorType::TensorType_INT8, 1.0f, -2},
+    // Int16
+    // TODO Enable when nnfw api support int16 type
+    // ConcatVariationParam{
+    //    uniformTCD<int16_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+    //                                  {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+    //    circle::TensorType::TensorType_INT16, 1.0f, 0},
+    // Int32
+    ConcatVariationParam{uniformTCD<int32_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+                                             {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+                         circle::TensorType::TensorType_INT32},
+    // Int64
+    ConcatVariationParam{uniformTCD<int64_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+                                             {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+                         circle::TensorType::TensorType_INT64}));
+
+TEST_P(ConcatVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+  int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+  int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
+  cgen.addOperatorConcatenation({{input1, input2}, {output}}, 0,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({input1, input2}, {output});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Concat_Subtensor_4D)
+{
+  CircleGen cgen;
+  int in1 = cgen.addTensor({{1, 1, 1, 20}, circle::TensorType::TensorType_FLOAT32});
+  int in2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> axis_data{3};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+
+  int s_out1 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
+  int s_out2 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
+  int s_out3 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
+  int s_out4 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
+
+  int c_out1 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+  int c_out2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+  int c_out3 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+
+  int a_out1 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+  int a_out2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+  int a_out3 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+
+  int final_out = cgen.addTensor({{1, 1, 1, 35}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorSplit({{axis, in1}, {s_out1, s_out2, s_out3, s_out4}}, 4);
+
+  cgen.addOperatorConcatenation({{s_out1, s_out2}, {c_out1}}, 3,
+                                circle::ActivationFunctionType::ActivationFunctionType_NONE);
+  cgen.addOperatorConcatenation({{s_out1, s_out3}, {c_out2}}, 3,
+                                circle::ActivationFunctionType::ActivationFunctionType_NONE);
+  cgen.addOperatorConcatenation({{s_out1, s_out4}, {c_out3}}, 3,
+                                circle::ActivationFunctionType::ActivationFunctionType_NONE);
+
+  cgen.addOperatorAdd({{c_out1, in2}, {a_out1}},
+                      circle::ActivationFunctionType::ActivationFunctionType_NONE);
+  cgen.addOperatorAdd({{c_out2, in2}, {a_out2}},
+                      circle::ActivationFunctionType::ActivationFunctionType_NONE);
+  cgen.addOperatorAdd({{c_out3, in2}, {a_out3}},
+                      circle::ActivationFunctionType::ActivationFunctionType_NONE);
+
+  cgen.addOperatorConcatenation({{s_out1, a_out1, a_out2, a_out3}, {final_out}}, 3,
+                                circle::ActivationFunctionType::ActivationFunctionType_NONE);
+
+  cgen.setInputsAndOutputs({in1, in2}, {s_out1, s_out2, s_out3, s_out4, c_out1, c_out2, c_out3,
+                                        a_out1, a_out2, a_out3, final_out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>(
+    {
+      // inputs
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, // in1
+      {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}                                           // in2
+    },
+    {
+      // outputs
+      {1, 2, 3, 4, 5},                     // s_out1
+      {6, 7, 8, 9, 10},                    // s_out2
+      {11, 12, 13, 14, 15},                // s_out3
+      {16, 17, 18, 19, 20},                // s_out4
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},     // c_out1
+      {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // c_out2
+      {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // c_out3
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10},     // a_out1
+      {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // a_out2
+      {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // a_out3
+      {1, 2, 3,  4,  5,  1,  2,  3, 4, 5, 6, 7, 8,  9,  10, 1,  2, 3,
+       4, 5, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20} // final_out
+    }));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_P(ConcatVariation, neg_InvalidAxis)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+  int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+  int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
+  int axis = 2;
+
+  cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({input1, input2}, {output});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_P(ConcatVariation, neg_InvalidRank)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+  int input2 = cgen.addTensor({{1, 2, 3}, param.type}, param.scale, param.zero_point);
+  int output = cgen.addTensor({{1, 4, 3}, param.type}, param.scale, param.zero_point);
+  int axis = 0;
+
+  cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({input1, input2}, {output});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_P(ConcatVariation, neg_InvalidDimension)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+  int input2 = cgen.addTensor({{3, 2}, param.type}, param.scale, param.zero_point);
+  int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
+  int axis = 0;
+
+  cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({input1, input2}, {output});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Conv2D.cc b/tests/nnfw_api/src/one_op_tests/Conv2D.cc
deleted file mode 100644
index 4f58e3d53..000000000
--- a/tests/nnfw_api/src/one_op_tests/Conv2D.cc
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-TEST_F(GenModelTest, OneOp_Conv2D)
-{
-  CircleGen cgen;
-  std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<float> bias_data{2, 3};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
-  int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
-  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
-  int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
-                         circle::ActivationFunctionType_NONE, 1, 1);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>(
-    {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}},
-    {{47, -4, -25, 9, 10, 10, -13, 11, -14, -26, -12, 26, 20, 40, 1, 3, 11, 4}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu", "ruy", "xnnpack", "gpu_cl"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Conv2D_Stride)
-{
-  CircleGen cgen;
-  std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<float> bias_data{2, 3};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
-  int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
-  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
-  int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 2, 2,
-                         circle::ActivationFunctionType_NONE, 1, 1);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>(
-    {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}},
-    {{22, 27, -10, -2, 5, -8, 7, 3, -14, -26, -10, 18, 4, -13, -28, 9, 14, 1}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu", "ruy", "xnnpack"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Conv2D_Dilation)
-{
-  CircleGen cgen;
-  std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<float> bias_data{2, 3};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
-  int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
-  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
-  int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
-                         circle::ActivationFunctionType_NONE, 2, 2);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>(
-    {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}},
-    {{-52, 7}}));
-  _context->setBackends({"cpu", "ruy", "xnnpack"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Conv2D_I8)
-{
-  CircleGen cgen;
-  std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<int32_t> bias_data{0, 2, 4};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
-  int weight =
-    cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 0);
-  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
-  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
-  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
-                         circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<int8_t>({{10, 10, 10}}, {{15, 38, 61}}));
-  _context->setBackends({"cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Conv2D_I8_PerChannel)
-{
-  CircleGen cgen;
-  std::vector<int8_t> weight_data{1, 2, 3, 1, 2, 3, 7, 8, 9};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<int32_t> bias_data{0, 0, 0};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
-  std::vector<float> weight_scales = {0.5, 1, 0.5};
-  std::vector<int64_t> weight_zeropoints = {0, 0, 0};
-  int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
-                              weight_scales, weight_zeropoints);
-  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
-  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
-  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
-                         circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<int8_t>({{10, 10, 10}}, {{15, 30, 60}}));
-  _context->setBackends({"cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Conv2D_Type)
-{
-  CircleGen cgen;
-  std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<float> bias_data{2, 3};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
-  int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
-  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
-  int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT16});
-  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
-                         circle::ActivationFunctionType_NONE, 1, 1);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Conv2D_Stride)
-{
-  CircleGen cgen;
-  std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<float> bias_data{2, 3};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
-  int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
-  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
-  int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 0, 0,
-                         circle::ActivationFunctionType_NONE, 1, 1);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Conv2D_Dilation)
-{
-  CircleGen cgen;
-  std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<float> bias_data{2, 3};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
-  int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
-  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
-  int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
-                         circle::ActivationFunctionType_NONE, 0, 0);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoint)
-{
-  CircleGen cgen;
-  std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<int32_t> bias_data{0, 2, 4};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
-  int weight =
-    cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 17);
-  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
-  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
-  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
-                         circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"cpu"});
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoints)
-{
-  CircleGen cgen;
-  std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<int32_t> bias_data{0, 2, 4};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
-  std::vector<float> weight_scales = {0.5, 1, 0.5};
-  std::vector<int64_t> weight_zeropoints = {0, 0, 10};
-  int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
-                              weight_scales, weight_zeropoints);
-  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
-  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32}, 1.0, 0);
-  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
-                         circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"cpu"});
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Conv2D.test.cc b/tests/nnfw_api/src/one_op_tests/Conv2D.test.cc
new file mode 100644
index 000000000..dccf2e5b8
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Conv2D.test.cc
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_Conv2D)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{2, 3};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE, 1, 1);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>(
+    {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}},
+    {{47, -4, -25, 9, 10, 10, -13, 11, -14, -26, -12, 26, 20, 40, 1, 3, 11, 4}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "ruy", "xnnpack", "gpu_cl"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_Stride)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{2, 3};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 2, 2,
+                         circle::ActivationFunctionType_NONE, 1, 1);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>(
+    {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}},
+    {{22, 27, -10, -2, 5, -8, 7, 3, -14, -26, -10, 18, 4, -13, -28, 9, 14, 1}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "ruy", "xnnpack"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_Dilation)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{2, 3};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE, 2, 2);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>(
+    {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}},
+    {{-52, 7}}));
+  _context->setBackends({"cpu", "ruy", "xnnpack"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_I8)
+{
+  CircleGen cgen;
+  std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<int32_t> bias_data{0, 2, 4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+  int weight =
+    cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 0);
+  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{10, 10, 10}}, {{15, 38, 61}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_I8_PerChannel)
+{
+  CircleGen cgen;
+  std::vector<int8_t> weight_data{1, 2, 3, 1, 2, 3, 7, 8, 9};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<int32_t> bias_data{0, 0, 0};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+  std::vector<float> weight_scales = {0.5, 1, 0.5};
+  std::vector<int64_t> weight_zeropoints = {0, 0, 0};
+  int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
+                              weight_scales, weight_zeropoints);
+  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<int8_t>({{10, 10, 10}}, {{15, 30, 60}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_U8_PerChannel)
+{
+  CircleGen cgen;
+  // weight
+  std::vector<uint8_t> weight_data{2, 6, 2, 1, 2, 3, 2, 3, 4};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> weight_scales = {.5, 1, 2};
+  std::vector<int64_t> weight_zeropoints = {2, 0, 1};
+  int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_UINT8, weight_buf},
+                              weight_scales, weight_zeropoints);
+  // bias
+  std::vector<int32_t> bias_data{4, -8, -4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1., 0);
+
+  // in and out
+  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_UINT8}, 2., 1);
+  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_UINT8}, 4., 2);
+
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<uint8_t>({{5, 3, 7}}, {{5, 11, 24}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_Type)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{2, 3};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT16});
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE, 1, 1);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_Stride)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{2, 3};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 0, 0,
+                         circle::ActivationFunctionType_NONE, 1, 1);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_Dilation)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{2, 3};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE, 0, 0);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoint)
+{
+  CircleGen cgen;
+  std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<int32_t> bias_data{0, 2, 4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+  int weight =
+    cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 17);
+  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoints)
+{
+  CircleGen cgen;
+  std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<int32_t> bias_data{0, 2, 4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+  std::vector<float> weight_scales = {0.5, 1, 0.5};
+  std::vector<int64_t> weight_zeropoints = {0, 0, 10};
+  int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
+                              weight_scales, weight_zeropoints);
+  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32}, 1.0, 0);
+  cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+                         circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Cos.cc b/tests/nnfw_api/src/one_op_tests/Cos.test.cc
index 03944746a..03944746a 100644
--- a/tests/nnfw_api/src/one_op_tests/Cos.cc
+++ b/tests/nnfw_api/src/one_op_tests/Cos.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc b/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc
deleted file mode 100644
index a4fe88493..000000000
--- a/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-struct DepthToSpaceVariationParam
-{
-  TestCaseData tcd;
-  circle::TensorType type = circle::TensorType::TensorType_FLOAT32;
-  float scale = 0.0f;
-  int64_t zero_point = 0;
-};
-
-class DepthToSpaceVariation : public GenModelTest,
-                              public ::testing::WithParamInterface<DepthToSpaceVariationParam>
-{
-};
-
-// Input shape: {1, 1, 2, 4}
-// Block size: 2
-// Output shape: {1, 2, 4, 1}
-INSTANTIATE_TEST_CASE_P(
-  GenModelTest, DepthToSpaceVariation,
-  ::testing::Values(
-    // Float
-    DepthToSpaceVariationParam{
-      uniformTCD<float>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}})},
-    // Int32
-    DepthToSpaceVariationParam{
-      uniformTCD<int32_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
-      circle::TensorType::TensorType_INT32},
-    // Int64
-    DepthToSpaceVariationParam{
-      uniformTCD<int64_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
-      circle::TensorType::TensorType_INT64},
-    // Uint8
-    DepthToSpaceVariationParam{
-      uniformTCD<uint8_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
-      circle::TensorType::TensorType_UINT8, 1.0f, -2},
-    // Int8
-    DepthToSpaceVariationParam{
-      uniformTCD<int8_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
-      circle::TensorType::TensorType_INT8, 1.0f, -2}));
-
-TEST_P(DepthToSpaceVariation, Test)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 1, 2, 4}, param.type}, param.scale, param.zero_point);
-  int out = cgen.addTensor({{1, 2, 4, 1}, param.type}, param.scale, param.zero_point);
-  cgen.addOperatorDepthToSpace({{in}, {out}}, 2);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(param.tcd);
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_P(DepthToSpaceVariation, neg_Blocksize)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 1, 2, 4}, param.type}, param.scale, param.zero_point);
-  int out = cgen.addTensor({{1, 2, 4, 1}, param.type}, param.scale, param.zero_point);
-  cgen.addOperatorDepthToSpace({{in}, {out}}, -2);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/DepthToSpace.test.cc b/tests/nnfw_api/src/one_op_tests/DepthToSpace.test.cc
new file mode 100644
index 000000000..ad2272996
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/DepthToSpace.test.cc
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct DepthToSpaceVariationParam
+{
+  TestCaseData tcd;
+  circle::TensorType type = circle::TensorType::TensorType_FLOAT32;
+  float scale = 0.0f;
+  int64_t zero_point = 0;
+};
+
+class DepthToSpaceVariation : public GenModelTest,
+                              public ::testing::WithParamInterface<DepthToSpaceVariationParam>
+{
+};
+
+// Input shape: {1, 1, 2, 4}
+// Block size: 2
+// Output shape: {1, 2, 4, 1}
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, DepthToSpaceVariation,
+  ::testing::Values(
+    // Float
+    DepthToSpaceVariationParam{
+      uniformTCD<float>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}})},
+    // Int32
+    DepthToSpaceVariationParam{
+      uniformTCD<int32_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
+      circle::TensorType::TensorType_INT32},
+    // Int64
+    DepthToSpaceVariationParam{
+      uniformTCD<int64_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
+      circle::TensorType::TensorType_INT64},
+    // Uint8
+    DepthToSpaceVariationParam{
+      uniformTCD<uint8_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
+      circle::TensorType::TensorType_UINT8, 1.0f, -2},
+    // Int8
+    DepthToSpaceVariationParam{
+      uniformTCD<int8_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
+      circle::TensorType::TensorType_INT8, 1.0f, -2}));
+
+TEST_P(DepthToSpaceVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 1, 2, 4}, param.type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({{1, 2, 4, 1}, param.type}, param.scale, param.zero_point);
+  cgen.addOperatorDepthToSpace({{in}, {out}}, 2);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_P(DepthToSpaceVariation, neg_Blocksize)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 1, 2, 4}, param.type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({{1, 2, 4, 1}, param.type}, param.scale, param.zero_point);
+  cgen.addOperatorDepthToSpace({{in}, {out}}, -2);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc
deleted file mode 100644
index a0bdbf9e6..000000000
--- a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc
+++ /dev/null
@@ -1,457 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-TEST_F(GenModelTest, OneOp_DepthwiseConv2D)
-{
-  CircleGen cgen;
-  std::vector<float> weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<float> bias_data{1, 2, 3, 4};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32});
-  int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
-  int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
-  int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
-                                  circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>({{1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}},
-                                          {{71, -34, 99, -20, 91, -26, 127, -4}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_DepthwiseConv2D_No_Multiplier)
-{
-  CircleGen cgen;
-  std::vector<float> weight_data{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<float> bias_data{0.5f, -0.5f};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
-  int weight = cgen.addTensor({{1, 3, 1, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf});
-  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
-  int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1,
-                                  circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(
-    uniformTCD<float>({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}},
-                      {{16.5f, 27.5f, 28.5f, 43.5f, 8.5f, 15.5f, 12.5f, 23.5f}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_DepthwiseConv2D_No_Multiplier_RELU6)
-{
-  CircleGen cgen;
-  std::vector<float> weight_data{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<float> bias_data{0.5f, -0.5f};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
-  int weight = cgen.addTensor({{1, 3, 1, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf});
-  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
-  int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1,
-                                  circle::ActivationFunctionType_RELU6);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}},
-                                          {{6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_DepthwiseConv2D_3x3)
-{
-  CircleGen cgen;
-  std::vector<float> weight_data{0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 1.0f,
-                                 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<float> bias_data{0.0f, 0.0f};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
-  int weight = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf});
-  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
-  int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1,
-                                  circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(
-    uniformTCD<float>({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}},
-                      {{6.0f, 16.0f, 8.0f, 16.0f, 10.0f, 16.0f, 12.0f, 16.0f}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_DepthwiseConv2D_Dilation)
-{
-  CircleGen cgen;
-  std::vector<float> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<float> bias_data{0, 0, 0, 0};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 4, 4, 2}, circle::TensorType::TensorType_FLOAT32});
-  int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
-  int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
-  int out = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
-                                  circle::ActivationFunctionType_NONE, 2, 2);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>({{
-                                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
-                                            0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                                          }},
-                                          {{13, 14, 0, 0, 0, 0, 11, 12, 5, 6, 0, 0, 0, 0, 3, 4}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_DepthwiseConv2D_Dilation_N_Stride)
-{
-  CircleGen cgen;
-  std::vector<float> weight_data{1, 2, 3, 4};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<float> bias_data{0, 0, 0, 0};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 6, 6, 1}, circle::TensorType::TensorType_FLOAT32});
-  int weight = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
-  int bias = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32, bias_buf});
-  int out = cgen.addTensor({{1, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 2, 2, 1,
-                                  circle::ActivationFunctionType_NONE, 3, 3);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>({{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
-                                            0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
-                                          {{4, 0, 3, 0, 0, 0, 2, 0, 1}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack", "gpu_cl"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Stride)
-{
-  CircleGen cgen;
-  std::vector<float> weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<float> bias_data{1, 2, 3, 4};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32});
-  int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
-  int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
-  int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 0, 0, 2,
-                                  circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Dilation)
-{
-  CircleGen cgen;
-  std::vector<float> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<float> bias_data{0, 0, 0, 0};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 4, 4, 2}, circle::TensorType::TensorType_FLOAT32});
-  int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
-  int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
-  int out = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
-                                  circle::ActivationFunctionType_NONE, 0, 0);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Type)
-{
-  CircleGen cgen;
-  std::vector<float> weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<float> bias_data{1, 2, 3, 4};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32});
-  int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
-  int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
-  int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_UINT8});
-  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
-                                  circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-// Generate a model for negative test cases
-CircleBuffer genNegTestDepthwiseConv2DModel(circle::Padding padding, int stride_w, int stride_h,
-                                            int depth_multiplier,
-                                            circle::ActivationFunctionType actfn)
-{
-  CircleGen cgen;
-  uint32_t ker_buf = cgen.addBuffer(std::vector<uint8_t>{0, 1, 2, 3, 0, 1, 2, 3});
-  uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>{0, 0});
-  int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType_UINT8}, 0.5, 0);
-  int ker = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType_UINT8, ker_buf}, 0.5, 0);
-  int bias = cgen.addTensor({{2}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
-  int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType_UINT8}, 1, 0);
-  cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, padding, stride_w, stride_h,
-                                  depth_multiplier, actfn, 0, 0);
-  cgen.setInputsAndOutputs({in}, {out});
-  return cgen.finish();
-}
-
-template <typename T> struct DepthwiseConv2DQuantTestParam
-{
-  int stride = 1; // Used for both height and width
-  int input_depth = 1;
-  int depth_multiplier = 1;
-  std::vector<T> ref_output;
-};
-
-template <typename T>
-class DepthwiseConv2DQuantTest
-  : public GenModelTest,
-    public ::testing::WithParamInterface<DepthwiseConv2DQuantTestParam<T>>
-{
-};
-
-using DepthwiseConv2DQuantTestParamU8 = DepthwiseConv2DQuantTestParam<uint8_t>;
-using DepthwiseConv2DQuantTestU8 = DepthwiseConv2DQuantTest<uint8_t>;
-
-// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU
-// kernels.
-INSTANTIATE_TEST_CASE_P(
-  GenModelTest, DepthwiseConv2DQuantTestU8,
-  ::testing::Values(
-    // Stride == 1
-    DepthwiseConv2DQuantTestParamU8{1, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
-    DepthwiseConv2DQuantTestParamU8{1, 4, 2, std::vector<uint8_t>{0, 0, 2, 3, 0, 2, 6, 9}},
-    DepthwiseConv2DQuantTestParamU8{
-      1, 2, 8, std::vector<uint8_t>{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}},
-    DepthwiseConv2DQuantTestParamU8{1, 2, 2, std::vector<uint8_t>{0, 1, 4, 6}},
-    DepthwiseConv2DQuantTestParamU8{1, 2, 1, std::vector<uint8_t>{2, 5}},
-    DepthwiseConv2DQuantTestParamU8{1, 1, 2, std::vector<uint8_t>{2, 4}},
-    DepthwiseConv2DQuantTestParamU8{1, 1, 4, std::vector<uint8_t>{0, 2, 3, 5}},
-    DepthwiseConv2DQuantTestParamU8{1, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
-    DepthwiseConv2DQuantTestParamU8{
-      1, 4, 4, std::vector<uint8_t>{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}},
-    DepthwiseConv2DQuantTestParamU8{1, 12, 1,
-                                    std::vector<uint8_t>{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}},
-    // Stride == 2
-    DepthwiseConv2DQuantTestParamU8{2, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
-    DepthwiseConv2DQuantTestParamU8{2, 2, 1, std::vector<uint8_t>{2, 5}},
-    DepthwiseConv2DQuantTestParamU8{2, 1, 8, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5}},
-    DepthwiseConv2DQuantTestParamU8{2, 1, 32, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3,
-                                                                   5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2,
-                                                                   3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
-    DepthwiseConv2DQuantTestParamU8{
-      2, 1, 20, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
-    DepthwiseConv2DQuantTestParamU8{
-      2, 1, 16, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
-    DepthwiseConv2DQuantTestParamU8{2, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
-    DepthwiseConv2DQuantTestParamU8{
-      2, 8, 2, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}},
-    DepthwiseConv2DQuantTestParamU8{
-      2, 16, 1, std::vector<uint8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
-
-CircleBuffer genDepthwiseConv2DQuantU8Model(int stride, int input_depth, int depth_multiplier)
-{
-  assert(1 <= stride && stride <= 2);
-  assert(1 <= input_depth && input_depth <= 16);
-  assert(1 <= depth_multiplier && depth_multiplier <= 32);
-
-  const int output_depth = input_depth * depth_multiplier;
-  assert(1 <= output_depth && output_depth <= 32);
-
-  CircleGen cgen;
-  uint32_t ker_buf = cgen.addBuffer(std::vector<uint8_t>{
-    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
-    2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
-    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
-    2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
-    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
-  uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0));
-  int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_UINT8}, 0.5, 0);
-  int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_UINT8, ker_buf}, 0.5, 0);
-  int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
-  int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_UINT8}, 1, 0);
-  cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride,
-                                  stride, depth_multiplier, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-  return cgen.finish();
-}
-
-TEST_P(DepthwiseConv2DQuantTestU8, Test)
-{
-  // Same input is used for all tests but output differs
-  static const std::vector<uint8_t> input64{
-    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
-    2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
-
-  auto &param = GetParam();
-  _context = std::make_unique<GenModelTestContext>(
-    genDepthwiseConv2DQuantU8Model(param.stride, param.input_depth, param.depth_multiplier));
-  std::vector<uint8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
-  _context->addTestCase(uniformTCD<uint8_t>({ref_input}, {param.ref_output}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-using DepthwiseConv2DQuantTestParamI8 = DepthwiseConv2DQuantTestParam<int8_t>;
-using DepthwiseConv2DQuantTestI8 = DepthwiseConv2DQuantTest<int8_t>;
-
-// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU
-// kernels.
-INSTANTIATE_TEST_CASE_P(
-  GenModelTest, DepthwiseConv2DQuantTestI8,
-  ::testing::Values(
-    // Stride == 1
-    DepthwiseConv2DQuantTestParamI8{1, 8, 1, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
-    DepthwiseConv2DQuantTestParamI8{1, 4, 2, std::vector<int8_t>{0, 0, 2, 3, 0, 2, 6, 9}},
-    DepthwiseConv2DQuantTestParamI8{
-      1, 2, 8, std::vector<int8_t>{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}},
-    DepthwiseConv2DQuantTestParamI8{1, 2, 2, std::vector<int8_t>{0, 1, 4, 6}},
-    DepthwiseConv2DQuantTestParamI8{1, 2, 1, std::vector<int8_t>{2, 5}},
-    DepthwiseConv2DQuantTestParamI8{1, 1, 2, std::vector<int8_t>{2, 4}},
-    DepthwiseConv2DQuantTestParamI8{1, 1, 4, std::vector<int8_t>{0, 2, 3, 5}},
-    DepthwiseConv2DQuantTestParamI8{1, 4, 1, std::vector<int8_t>{0, 1, 4, 9}},
-    DepthwiseConv2DQuantTestParamI8{
-      1, 4, 4, std::vector<int8_t>{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}},
-    DepthwiseConv2DQuantTestParamI8{1, 12, 1,
-                                    std::vector<int8_t>{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}},
-    // Stride == 2
-    DepthwiseConv2DQuantTestParamI8{2, 4, 1, std::vector<int8_t>{0, 1, 4, 9}},
-    DepthwiseConv2DQuantTestParamI8{2, 2, 1, std::vector<int8_t>{2, 5}},
-    DepthwiseConv2DQuantTestParamI8{2, 1, 8, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5}},
-    DepthwiseConv2DQuantTestParamI8{2, 1, 32, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3,
-                                                                  5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2,
-                                                                  3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
-    DepthwiseConv2DQuantTestParamI8{
-      2, 1, 20, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
-    DepthwiseConv2DQuantTestParamI8{
-      2, 1, 16, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
-    DepthwiseConv2DQuantTestParamI8{2, 8, 1, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
-    DepthwiseConv2DQuantTestParamI8{
-      2, 8, 2, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}},
-    DepthwiseConv2DQuantTestParamI8{
-      2, 16, 1, std::vector<int8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
-
-CircleBuffer genDepthwiseConv2DQuantI8Model(int stride, int input_depth, int depth_multiplier)
-{
-  assert(1 <= stride && stride <= 2);
-  assert(1 <= input_depth && input_depth <= 16);
-  assert(1 <= depth_multiplier && depth_multiplier <= 32);
-
-  const int output_depth = input_depth * depth_multiplier;
-  assert(1 <= output_depth && output_depth <= 32);
-
-  CircleGen cgen;
-  uint32_t ker_buf = cgen.addBuffer(std::vector<int8_t>{
-    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
-    2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
-    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
-    2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
-    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
-  uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0));
-  int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_INT8}, 0.5, 0);
-  int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_INT8, ker_buf}, 0.5, 0);
-  int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
-  int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_INT8}, 1, 0);
-  cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride,
-                                  stride, depth_multiplier, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-  return cgen.finish();
-}
-
-TEST_P(DepthwiseConv2DQuantTestI8, Test)
-{
-  // Same input is used for all tests but output differs
-  static const std::vector<int8_t> input64{
-    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
-    2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
-
-  auto &param = GetParam();
-  _context = std::make_unique<GenModelTestContext>(
-    genDepthwiseConv2DQuantI8Model(param.stride, param.input_depth, param.depth_multiplier));
-  std::vector<int8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
-  _context->addTestCase(uniformTCD<int8_t>({ref_input}, {param.ref_output}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_InvalidPaddingType)
-{
-  _context = std::make_unique<GenModelTestContext>(genNegTestDepthwiseConv2DModel(
-    static_cast<circle::Padding>(99), 1, 1, 1, circle::ActivationFunctionType_NONE));
-  _context->expectFailModelLoad();
-  _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
-
-  SUCCEED();
-}
-
-// TODO add other invalid operation tests like above
-
-TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_I8_NonZero_ZeroPoints)
-{
-  CircleGen cgen;
-  std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8};
-  uint32_t weight_buf = cgen.addBuffer(weight_data);
-  std::vector<int32_t> bias_data{0, 2};
-  uint32_t bias_buf = cgen.addBuffer(bias_data);
-  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_INT8}, 0.5, 0);
-  std::vector<float> weight_scales = {0.5, 1};
-  std::vector<int64_t> weight_zeropoints = {0, 10};
-  int weight = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_INT8, weight_buf},
-                              weight_scales, weight_zeropoints);
-  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_INT32, bias_buf});
-  int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}, 1.0, 0);
-  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
-                                  circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"cpu"});
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.test.cc b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.test.cc
new file mode 100644
index 000000000..f82d988d5
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.test.cc
@@ -0,0 +1,502 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{1, 2, 3, 4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
+                                  circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}},
+                                          {{71, -34, 99, -20, 91, -26, 127, -4}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D_No_Multiplier)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{0.5f, -0.5f};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{1, 3, 1, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1,
+                                  circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}},
+                      {{16.5f, 27.5f, 28.5f, 43.5f, 8.5f, 15.5f, 12.5f, 23.5f}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D_No_Multiplier_RELU6)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{0.5f, -0.5f};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{1, 3, 1, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1,
+                                  circle::ActivationFunctionType_RELU6);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}},
+                                          {{6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D_3x3)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+                                 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{0.0f, 0.0f};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1,
+                                  circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    uniformTCD<float>({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}},
+                      {{6.0f, 16.0f, 8.0f, 16.0f, 10.0f, 16.0f, 12.0f, 16.0f}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D_Dilation)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{0, 0, 0, 0};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 4, 4, 2}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
+                                  circle::ActivationFunctionType_NONE, 2, 2);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{
+                                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
+                                            0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                          }},
+                                          {{13, 14, 0, 0, 0, 0, 11, 12, 5, 6, 0, 0, 0, 0, 3, 4}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D_Dilation_N_Stride)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{1, 2, 3, 4};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{0, 0, 0, 0};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 6, 6, 1}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 2, 2, 1,
+                                  circle::ActivationFunctionType_NONE, 3, 3);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
+                                            0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
+                                          {{4, 0, 3, 0, 0, 0, 2, 0, 1}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack", "gpu_cl"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D_U8_PerChannel)
+{
+  CircleGen cgen;
+  // weight
+  // clang-format off
+  std::vector<uint8_t> weight_data{2, 1, 2,
+                                   6, 2, 3,
+                                   2, 3, 4,
+                                   4, 4, 5};
+  // clang-format on
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> weight_scales = {.5, 1, 2};
+  std::vector<int64_t> weight_zeropoints = {2, 0, 1};
+  int weight = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_UINT8, weight_buf},
+                              weight_scales, weight_zeropoints);
+  // bias
+  std::vector<int32_t> bias_data{4, -8, -4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1., 0);
+
+  // in and out
+  int in = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_UINT8}, 2., 1);
+  int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_UINT8}, 4., 2);
+
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 1,
+                                  circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  // clang-format off
+  _context->addTestCase(uniformTCD<uint8_t>({{5, 5, 5,  // NHWC
+                                              3, 3, 3,
+                                              7, 7, 7,
+                                              9, 9, 9}
+                                            },
+                                            {{9,
+                                              27,
+                                              56}
+                                            }));
+  // clang-format on
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Stride)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{1, 2, 3, 4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 0, 0, 2,
+                                  circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Dilation)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{0, 0, 0, 0};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 4, 4, 2}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
+                                  circle::ActivationFunctionType_NONE, 0, 0);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Type)
+{
+  CircleGen cgen;
+  std::vector<float> weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<float> bias_data{1, 2, 3, 4};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+  int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+  int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_UINT8});
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
+                                  circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+// Generate a model for negative test cases
+CircleBuffer genNegTestDepthwiseConv2DModel(circle::Padding padding, int stride_w, int stride_h,
+                                            int depth_multiplier,
+                                            circle::ActivationFunctionType actfn)
+{
+  CircleGen cgen;
+  uint32_t ker_buf = cgen.addBuffer(std::vector<uint8_t>{0, 1, 2, 3, 0, 1, 2, 3});
+  uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>{0, 0});
+  int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType_UINT8}, 0.5, 0);
+  int ker = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType_UINT8, ker_buf}, 0.5, 0);
+  int bias = cgen.addTensor({{2}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
+  int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType_UINT8}, 1, 0);
+  cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, padding, stride_w, stride_h,
+                                  depth_multiplier, actfn, 0, 0);
+  cgen.setInputsAndOutputs({in}, {out});
+  return cgen.finish();
+}
+
+template <typename T> struct DepthwiseConv2DQuantTestParam
+{
+  int stride = 1; // Used for both height and width
+  int input_depth = 1;
+  int depth_multiplier = 1;
+  std::vector<T> ref_output;
+};
+
+template <typename T>
+class DepthwiseConv2DQuantTest
+  : public GenModelTest,
+    public ::testing::WithParamInterface<DepthwiseConv2DQuantTestParam<T>>
+{
+};
+
+using DepthwiseConv2DQuantTestParamU8 = DepthwiseConv2DQuantTestParam<uint8_t>;
+using DepthwiseConv2DQuantTestU8 = DepthwiseConv2DQuantTest<uint8_t>;
+
+// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU
+// kernels.
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, DepthwiseConv2DQuantTestU8,
+  ::testing::Values(
+    // Stride == 1
+    DepthwiseConv2DQuantTestParamU8{1, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+    DepthwiseConv2DQuantTestParamU8{1, 4, 2, std::vector<uint8_t>{0, 0, 2, 3, 0, 2, 6, 9}},
+    DepthwiseConv2DQuantTestParamU8{
+      1, 2, 8, std::vector<uint8_t>{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}},
+    DepthwiseConv2DQuantTestParamU8{1, 2, 2, std::vector<uint8_t>{0, 1, 4, 6}},
+    DepthwiseConv2DQuantTestParamU8{1, 2, 1, std::vector<uint8_t>{2, 5}},
+    DepthwiseConv2DQuantTestParamU8{1, 1, 2, std::vector<uint8_t>{2, 4}},
+    DepthwiseConv2DQuantTestParamU8{1, 1, 4, std::vector<uint8_t>{0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamU8{1, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
+    DepthwiseConv2DQuantTestParamU8{
+      1, 4, 4, std::vector<uint8_t>{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}},
+    DepthwiseConv2DQuantTestParamU8{1, 12, 1,
+                                    std::vector<uint8_t>{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}},
+    // Stride == 2
+    DepthwiseConv2DQuantTestParamU8{2, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
+    DepthwiseConv2DQuantTestParamU8{2, 2, 1, std::vector<uint8_t>{2, 5}},
+    DepthwiseConv2DQuantTestParamU8{2, 1, 8, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamU8{2, 1, 32, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3,
+                                                                   5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2,
+                                                                   3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamU8{
+      2, 1, 20, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamU8{
+      2, 1, 16, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamU8{2, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+    DepthwiseConv2DQuantTestParamU8{
+      2, 8, 2, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}},
+    DepthwiseConv2DQuantTestParamU8{
+      2, 16, 1, std::vector<uint8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
+
+CircleBuffer genDepthwiseConv2DQuantU8Model(int stride, int input_depth, int depth_multiplier)
+{
+  assert(1 <= stride && stride <= 2);
+  assert(1 <= input_depth && input_depth <= 16);
+  assert(1 <= depth_multiplier && depth_multiplier <= 32);
+
+  const int output_depth = input_depth * depth_multiplier;
+  assert(1 <= output_depth && output_depth <= 32);
+
+  CircleGen cgen;
+  uint32_t ker_buf = cgen.addBuffer(std::vector<uint8_t>{
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+    2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+    2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
+  uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0));
+  int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_UINT8}, 0.5, 0);
+  int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_UINT8, ker_buf}, 0.5, 0);
+  int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
+  int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_UINT8}, 1, 0);
+  cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride,
+                                  stride, depth_multiplier, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+  return cgen.finish();
+}
+
+TEST_P(DepthwiseConv2DQuantTestU8, Test)
+{
+  // Same input is used for all tests but output differs
+  static const std::vector<uint8_t> input64{
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
+    2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
+
+  auto &param = GetParam();
+  _context = std::make_unique<GenModelTestContext>(
+    genDepthwiseConv2DQuantU8Model(param.stride, param.input_depth, param.depth_multiplier));
+  std::vector<uint8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
+  _context->addTestCase(uniformTCD<uint8_t>({ref_input}, {param.ref_output}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+using DepthwiseConv2DQuantTestParamI8 = DepthwiseConv2DQuantTestParam<int8_t>;
+using DepthwiseConv2DQuantTestI8 = DepthwiseConv2DQuantTest<int8_t>;
+
+// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU
+// kernels.
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, DepthwiseConv2DQuantTestI8,
+  ::testing::Values(
+    // Stride == 1
+    DepthwiseConv2DQuantTestParamI8{1, 8, 1, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+    DepthwiseConv2DQuantTestParamI8{1, 4, 2, std::vector<int8_t>{0, 0, 2, 3, 0, 2, 6, 9}},
+    DepthwiseConv2DQuantTestParamI8{
+      1, 2, 8, std::vector<int8_t>{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}},
+    DepthwiseConv2DQuantTestParamI8{1, 2, 2, std::vector<int8_t>{0, 1, 4, 6}},
+    DepthwiseConv2DQuantTestParamI8{1, 2, 1, std::vector<int8_t>{2, 5}},
+    DepthwiseConv2DQuantTestParamI8{1, 1, 2, std::vector<int8_t>{2, 4}},
+    DepthwiseConv2DQuantTestParamI8{1, 1, 4, std::vector<int8_t>{0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamI8{1, 4, 1, std::vector<int8_t>{0, 1, 4, 9}},
+    DepthwiseConv2DQuantTestParamI8{
+      1, 4, 4, std::vector<int8_t>{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}},
+    DepthwiseConv2DQuantTestParamI8{1, 12, 1,
+                                    std::vector<int8_t>{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}},
+    // Stride == 2
+    DepthwiseConv2DQuantTestParamI8{2, 4, 1, std::vector<int8_t>{0, 1, 4, 9}},
+    DepthwiseConv2DQuantTestParamI8{2, 2, 1, std::vector<int8_t>{2, 5}},
+    DepthwiseConv2DQuantTestParamI8{2, 1, 8, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamI8{2, 1, 32, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3,
+                                                                  5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2,
+                                                                  3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamI8{
+      2, 1, 20, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamI8{
+      2, 1, 16, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+    DepthwiseConv2DQuantTestParamI8{2, 8, 1, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+    DepthwiseConv2DQuantTestParamI8{
+      2, 8, 2, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}},
+    DepthwiseConv2DQuantTestParamI8{
+      2, 16, 1, std::vector<int8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
+
+CircleBuffer genDepthwiseConv2DQuantI8Model(int stride, int input_depth, int depth_multiplier)
+{
+  assert(1 <= stride && stride <= 2);
+  assert(1 <= input_depth && input_depth <= 16);
+  assert(1 <= depth_multiplier && depth_multiplier <= 32);
+
+  const int output_depth = input_depth * depth_multiplier;
+  assert(1 <= output_depth && output_depth <= 32);
+
+  CircleGen cgen;
+  uint32_t ker_buf = cgen.addBuffer(std::vector<int8_t>{
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+    2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+    2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
+  uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0));
+  int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_INT8}, 0.5, 0);
+  int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_INT8, ker_buf}, 0.5, 0);
+  int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
+  int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_INT8}, 1, 0);
+  cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride,
+                                  stride, depth_multiplier, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+  return cgen.finish();
+}
+
+TEST_P(DepthwiseConv2DQuantTestI8, Test)
+{
+  // Same input is used for all tests but output differs
+  static const std::vector<int8_t> input64{
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
+    2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
+
+  auto &param = GetParam();
+  _context = std::make_unique<GenModelTestContext>(
+    genDepthwiseConv2DQuantI8Model(param.stride, param.input_depth, param.depth_multiplier));
+  std::vector<int8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
+  _context->addTestCase(uniformTCD<int8_t>({ref_input}, {param.ref_output}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_InvalidPaddingType)
+{
+  _context = std::make_unique<GenModelTestContext>(genNegTestDepthwiseConv2DModel(
+    static_cast<circle::Padding>(99), 1, 1, 1, circle::ActivationFunctionType_NONE));
+  _context->expectFailModelLoad();
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
+
+  SUCCEED();
+}
+
+// TODO add other invalid operation tests like above
+
+TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_I8_NonZero_ZeroPoints)
+{
+  CircleGen cgen;
+  std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8};
+  uint32_t weight_buf = cgen.addBuffer(weight_data);
+  std::vector<int32_t> bias_data{0, 2};
+  uint32_t bias_buf = cgen.addBuffer(bias_data);
+  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+  std::vector<float> weight_scales = {0.5, 1};
+  std::vector<int64_t> weight_zeropoints = {0, 10};
+  int weight = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_INT8, weight_buf},
+                              weight_scales, weight_zeropoints);
+  int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_INT32, bias_buf});
+  int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}, 1.0, 0);
+  cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
+                                  circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.cc b/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.test.cc
index 188638bbb..188638bbb 100644
--- a/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.cc
+++ b/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Elu.cc b/tests/nnfw_api/src/one_op_tests/Elu.test.cc
index a037070b2..a037070b2 100644
--- a/tests/nnfw_api/src/one_op_tests/Elu.cc
+++ b/tests/nnfw_api/src/one_op_tests/Elu.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Equal.cc b/tests/nnfw_api/src/one_op_tests/Equal.test.cc
index da890978e..da890978e 100644
--- a/tests/nnfw_api/src/one_op_tests/Equal.cc
+++ b/tests/nnfw_api/src/one_op_tests/Equal.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/ExpandDims.cc b/tests/nnfw_api/src/one_op_tests/ExpandDims.test.cc
index 280cf7344..280cf7344 100644
--- a/tests/nnfw_api/src/one_op_tests/ExpandDims.cc
+++ b/tests/nnfw_api/src/one_op_tests/ExpandDims.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Fill.cc b/tests/nnfw_api/src/one_op_tests/Fill.cc
deleted file mode 100644
index 4d5e4d8be..000000000
--- a/tests/nnfw_api/src/one_op_tests/Fill.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-struct FillVariationParam
-{
-  TestCaseData tcd;
-  const uint8_t *value_data = nullptr;
-  circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
-};
-
-class FillVariation : public GenModelTest, public ::testing::WithParamInterface<FillVariationParam>
-{
-};
-
-// value is constant
-TEST_P(FillVariation, Test)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-
-  size_t value_size =
-    (param.data_type == circle::TensorType::TensorType_INT64) ? sizeof(int64_t) : sizeof(int32_t);
-  uint32_t value_buf = cgen.addBuffer(param.value_data, value_size);
-
-  int dims = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
-  int value = cgen.addTensor({{1}, param.data_type, value_buf});
-  int out = cgen.addTensor({{2, 3}, param.data_type});
-  cgen.addOperatorFill({{dims, value}, {out}});
-  cgen.setInputsAndOutputs({dims}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(param.tcd);
-  _context->setBackends({"cpu"});
-
-  SUCCEED();
-}
-
-const int32_t test_int32 = 13;
-const int64_t test_int64 = 1052;
-const float test_float = 5.2;
-
-// Test with different value type
-INSTANTIATE_TEST_CASE_P(
-  GenModelTest, FillVariation,
-  ::testing::Values(
-    // float value
-    FillVariationParam{
-      TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<float>({5.2, 5.2, 5.2, 5.2, 5.2, 5.2}),
-      reinterpret_cast<const uint8_t *>(&test_float)},
-    // int32 value
-    FillVariationParam{
-      TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int32_t>({13, 13, 13, 13, 13, 13}),
-      reinterpret_cast<const uint8_t *>(&test_int32), circle::TensorType::TensorType_INT32},
-    // uint8 value
-    FillVariationParam{
-      TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int64_t>({1052, 1052, 1052, 1052, 1052,
-                                                                   1052}),
-      reinterpret_cast<const uint8_t *>(&test_int64), circle::TensorType::TensorType_INT64}));
-
-TEST_F(GenModelTest, OneOp_Fill_Int64_Shape)
-{
-  CircleGen cgen;
-  std::vector<float> value_data{1.3};
-  uint32_t value_buf = cgen.addBuffer(value_data);
-
-  int dims = cgen.addTensor({{2}, circle::TensorType::TensorType_INT64});
-  int value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, value_buf});
-  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorFill({{dims, value}, {out}});
-  cgen.setInputsAndOutputs({dims}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(
-    TestCaseData{}.addInput<int64_t>({2, 3}).addOutput<float>({1.3, 1.3, 1.3, 1.3, 1.3, 1.3}));
-  _context->setBackends({"cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Fill_Int32_oneoperand)
-{
-  CircleGen cgen;
-
-  int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
-  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_INT32});
-  cgen.addOperatorFill({{in}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(
-    TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int32_t>({13, 13, 13, 13, 13, 13}));
-  _context->setBackends({"cpu"});
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Fill_Int64_oneoperand)
-{
-  CircleGen cgen;
-
-  int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
-  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_INT64});
-  cgen.addOperatorFill({{in}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(
-    TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int64_t>({13, 13, 13, 13, 13, 13}));
-  _context->setBackends({"cpu"});
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Fill_Float32_oneoperand)
-{
-  CircleGen cgen;
-
-  int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
-  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorFill({{in}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(
-    TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<float>({1.3, 1.3, 1.3, 1.3, 1.3, 1.3}));
-  _context->setBackends({"cpu"});
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Fill.test.cc b/tests/nnfw_api/src/one_op_tests/Fill.test.cc
new file mode 100644
index 000000000..0d34056b3
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Fill.test.cc
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct FillVariationParam
+{
+  TestCaseData tcd;
+  const uint8_t *value_data = nullptr;
+  circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+};
+
+class FillVariation : public GenModelTest, public ::testing::WithParamInterface<FillVariationParam>
+{
+};
+
+// value is constant
+TEST_P(FillVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+
+  size_t value_size =
+    (param.data_type == circle::TensorType::TensorType_INT64) ? sizeof(int64_t) : sizeof(int32_t);
+  uint32_t value_buf = cgen.addBuffer(param.value_data, value_size);
+
+  int dims = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+  int value = cgen.addTensor({{1}, param.data_type, value_buf});
+  int out = cgen.addTensor({{2, 3}, param.data_type});
+  cgen.addOperatorFill({{dims, value}, {out}});
+  cgen.setInputsAndOutputs({dims}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+const int32_t test_int32 = 13;
+const int64_t test_int64 = 1052;
+const float test_float = 5.2;
+
+// Test with different value type
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, FillVariation,
+  ::testing::Values(
+    // float value
+    FillVariationParam{
+      TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<float>({5.2, 5.2, 5.2, 5.2, 5.2, 5.2}),
+      reinterpret_cast<const uint8_t *>(&test_float)},
+    // int32 value
+    FillVariationParam{
+      TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int32_t>({13, 13, 13, 13, 13, 13}),
+      reinterpret_cast<const uint8_t *>(&test_int32), circle::TensorType::TensorType_INT32},
+    // uint8 value
+    FillVariationParam{
+      TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int64_t>({1052, 1052, 1052, 1052, 1052,
+                                                                   1052}),
+      reinterpret_cast<const uint8_t *>(&test_int64), circle::TensorType::TensorType_INT64}));
+
+TEST_F(GenModelTest, OneOp_Fill_Int64_Shape)
+{
+  CircleGen cgen;
+  std::vector<float> value_data{1.3};
+  uint32_t value_buf = cgen.addBuffer(value_data);
+
+  int dims = cgen.addTensor({{2}, circle::TensorType::TensorType_INT64});
+  int value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, value_buf});
+  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorFill({{dims, value}, {out}});
+  cgen.setInputsAndOutputs({dims}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<int64_t>({2, 3}).addOutput<float>({1.3, 1.3, 1.3, 1.3, 1.3, 1.3}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Fill_Int32_oneoperand)
+{
+  CircleGen cgen;
+
+  int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorFill({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int32_t>({13, 13, 13, 13, 13, 13}));
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Fill_Int64_oneoperand)
+{
+  CircleGen cgen;
+
+  int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_INT64});
+  cgen.addOperatorFill({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int64_t>({13, 13, 13, 13, 13, 13}));
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Fill_Float32_oneoperand)
+{
+  CircleGen cgen;
+
+  int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorFill({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<float>({1.3, 1.3, 1.3, 1.3, 1.3, 1.3}));
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Floor.cc b/tests/nnfw_api/src/one_op_tests/Floor.test.cc
index dcb402027..dcb402027 100644
--- a/tests/nnfw_api/src/one_op_tests/Floor.cc
+++ b/tests/nnfw_api/src/one_op_tests/Floor.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/FloorDiv.cc b/tests/nnfw_api/src/one_op_tests/FloorDiv.test.cc
index edbca8504..edbca8504 100644
--- a/tests/nnfw_api/src/one_op_tests/FloorDiv.cc
+++ b/tests/nnfw_api/src/one_op_tests/FloorDiv.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/FullyConnected.cc b/tests/nnfw_api/src/one_op_tests/FullyConnected.test.cc
index 791787f9b..791787f9b 100644
--- a/tests/nnfw_api/src/one_op_tests/FullyConnected.cc
+++ b/tests/nnfw_api/src/one_op_tests/FullyConnected.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Greater.test.cc b/tests/nnfw_api/src/one_op_tests/Greater.test.cc
new file mode 100644
index 000000000..b63075c0e
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Greater.test.cc
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct GreaterVariationParam
+{
+  TestCaseData tcd;
+  circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+  const std::vector<std::string> backends = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class GreaterVariation : public GenModelTest,
+                         public ::testing::WithParamInterface<GreaterVariationParam>
+{
+};
+
+// Input shape:
+//   Base: {1, 2, 2, 1}
+//   Brodcast: {1} on of two input
+// Output shape: {1, 2, 2, 1}
+// Input type: Non-quantization type
+// Output type: BOOL
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(GenModelTest, GreaterVariation,
+                         ::testing::Values(
+                           // Float type
+                           GreaterVariationParam{TestCaseData{}
+                                                   .addInput<float>({0.1, 0.3, 0.2, 0.7})
+                                                   .addInput<float>({0.1, 0.2, 0.3, 0.4})
+                                                   .addOutput<bool>({false, true, false, true})},
+                           // Float type - broadcast
+                           GreaterVariationParam{TestCaseData{}
+                                                   .addInput<float>({0.1, 0.3, 0.2, 0.7})
+                                                   .addInput<float>({0.3})
+                                                   .addOutput<bool>({false, false, false, true})},
+                           // Int32 type
+                           GreaterVariationParam{TestCaseData{}
+                                                   .addInput<int32_t>({1, 3, 2, 7})
+                                                   .addInput<int32_t>({1, 2, 3, 4})
+                                                   .addOutput<bool>({false, true, false, true}),
+                                                 circle::TensorType::TensorType_INT32},
+                           // Int32 type - broadcast
+                           GreaterVariationParam{TestCaseData{}
+                                                   .addInput<int32_t>({1, 3, 2, 7})
+                                                   .addInput<int32_t>({5})
+                                                   .addOutput<bool>({false, false, false, true}),
+                                                 circle::TensorType::TensorType_INT32},
+                           // Int64 type
+                           // NYI: acl backend
+                           GreaterVariationParam{TestCaseData{}
+                                                   .addInput<int64_t>({1, 3, -2, 7})
+                                                   .addInput<int64_t>({1, 2, 3, 4})
+                                                   .addOutput<bool>({false, true, false, true}),
+                                                 circle::TensorType::TensorType_INT64,
+                                                 {"cpu"}},
+                           // Int64 type - broadcast
+                           // NYI: acl backend
+                           GreaterVariationParam{TestCaseData{}
+                                                   .addInput<int64_t>({1, 3, -2, 7})
+                                                   .addInput<int64_t>({1})
+                                                   .addOutput<bool>({false, true, false, true}),
+                                                 circle::TensorType::TensorType_INT64,
+                                                 {"cpu"}}));
+
+TEST_P(GreaterVariation, Test)
+{
+  auto &param = GetParam();
+
+  auto lhs_data = param.tcd.inputs.at(0);
+  auto rhs_data = param.tcd.inputs.at(1);
+
+  bool broadcast_lhs = false;
+  bool broadcast_rhs = false;
+  if (lhs_data.size() != rhs_data.size())
+  {
+    if (lhs_data.size() < rhs_data.size())
+      broadcast_lhs = true;
+    else
+      broadcast_rhs = true;
+  }
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_BOOL;
+
+  int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int out = cgen.addTensor({{1, 2, 2, 1}, output_type});
+  cgen.addOperatorGreater({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends(param.backends);
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Greater_DifferentType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+  cgen.addOperatorGreater({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Greater_InvalidType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorGreater({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/GreaterEqual.test.cc b/tests/nnfw_api/src/one_op_tests/GreaterEqual.test.cc
new file mode 100644
index 000000000..f824030e0
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/GreaterEqual.test.cc
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct GreaterEqualVariationParam
+{
+  TestCaseData tcd;
+  circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+  const std::vector<std::string> backends = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class GreaterEqualVariation : public GenModelTest,
+                              public ::testing::WithParamInterface<GreaterEqualVariationParam>
+{
+};
+
+// Input shape:
+//   Base: {1, 2, 2, 1}
+//   Brodcast: {1} on of two input
+// Output shape: {1, 2, 2, 1}
+// Input type: Non-quantization type
+// Output type: BOOL
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, GreaterEqualVariation,
+  ::testing::Values(
+    // Float type
+    GreaterEqualVariationParam{TestCaseData{}
+                                 .addInput<float>({0.1, 0.3, 0.2, 0.7})
+                                 .addInput<float>({0.1, 0.2, 0.3, 0.4})
+                                 .addOutput<bool>({true, true, false, true})},
+    // Float type - broadcast
+    GreaterEqualVariationParam{TestCaseData{}
+                                 .addInput<float>({0.1, 0.3, 0.2, 0.7})
+                                 .addInput<float>({0.3})
+                                 .addOutput<bool>({false, true, false, true})},
+    // Int32 type
+    GreaterEqualVariationParam{TestCaseData{}
+                                 .addInput<int32_t>({1, 3, 2, 7})
+                                 .addInput<int32_t>({1, 2, 3, 4})
+                                 .addOutput<bool>({true, true, false, true}),
+                               circle::TensorType::TensorType_INT32},
+    // Int32 type - broadcast
+    GreaterEqualVariationParam{TestCaseData{}
+                                 .addInput<int32_t>({1, 3, 2, 7})
+                                 .addInput<int32_t>({5})
+                                 .addOutput<bool>({false, false, false, true}),
+                               circle::TensorType::TensorType_INT32},
+    // Int64 type
+    // NYI: acl backend
+    GreaterEqualVariationParam{TestCaseData{}
+                                 .addInput<int64_t>({1, 3, -2, 7})
+                                 .addInput<int64_t>({1, 2, 3, 4})
+                                 .addOutput<bool>({true, true, false, true}),
+                               circle::TensorType::TensorType_INT64,
+                               {"cpu"}},
+    // Int64 type - broadcast
+    // NYI: acl backend
+    GreaterEqualVariationParam{TestCaseData{}
+                                 .addInput<int64_t>({1, 3, -2, 7})
+                                 .addInput<int64_t>({1})
+                                 .addOutput<bool>({true, true, false, true}),
+                               circle::TensorType::TensorType_INT64,
+                               {"cpu"}}));
+
+TEST_P(GreaterEqualVariation, Test)
+{
+  auto &param = GetParam();
+
+  auto lhs_data = param.tcd.inputs.at(0);
+  auto rhs_data = param.tcd.inputs.at(1);
+
+  bool broadcast_lhs = false;
+  bool broadcast_rhs = false;
+  if (lhs_data.size() != rhs_data.size())
+  {
+    if (lhs_data.size() < rhs_data.size())
+      broadcast_lhs = true;
+    else
+      broadcast_rhs = true;
+  }
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_BOOL;
+
+  int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int out = cgen.addTensor({{1, 2, 2, 1}, output_type});
+  cgen.addOperatorGreaterEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends(param.backends);
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_GreaterEqual_DifferentType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+  cgen.addOperatorGreaterEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_GreaterEqual_InvalidType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorGreaterEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/If.cc b/tests/nnfw_api/src/one_op_tests/If.cc
deleted file mode 100644
index 4ec294223..000000000
--- a/tests/nnfw_api/src/one_op_tests/If.cc
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-#include <memory>
-
-TEST_F(GenModelTest, OneOp_If)
-{
-  // The model looks just like the below pseudocode
-  //
-  // function model(x)
-  // {
-  //   if (x < 0.0)
-  //     return -100.0;
-  //   else
-  //     return 100.0;
-  // }
-
-  CircleGen cgen;
-
-  // constant buffers
-  std::vector<float> comp_data{0.0};
-  uint32_t comp_buf = cgen.addBuffer(comp_data);
-  std::vector<float> then_data{-100};
-  uint32_t then_buf = cgen.addBuffer(then_data);
-  std::vector<float> else_data{100};
-  uint32_t else_buf = cgen.addBuffer(else_data);
-
-  // primary subgraph
-  {
-    int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int comp = cgen.addTensor({{1}, circle::TensorType_FLOAT32, comp_buf});
-    int cond = cgen.addTensor({{1}, circle::TensorType_BOOL});
-    cgen.addOperatorLess({{x, comp}, {cond}});
-
-    int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    cgen.addOperatorIf({{cond}, {ret}}, 1, 2);
-
-    cgen.setInputsAndOutputs({x}, {ret});
-  }
-
-  // then subgraph
-  {
-    cgen.nextSubgraph();
-    int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, then_buf});
-    cgen.setInputsAndOutputs({}, {ret});
-  }
-
-  // else subgraph
-  {
-    cgen.nextSubgraph();
-    int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, else_buf});
-    cgen.setInputsAndOutputs({}, {ret});
-  }
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>({{-1.0}}, {{-100.0}}));
-  _context->addTestCase(uniformTCD<float>({{1.0}}, {{100.0}}));
-  _context->setBackends({"cpu"});
-
-  SUCCEED();
-}
-
-class IfWrongSubgraphIndex : public GenModelTest,
-                             public ::testing::WithParamInterface<std::pair<int, int>>
-{
-};
-
-TEST_P(IfWrongSubgraphIndex, neg_Test)
-{
-  // These values must be less than 0 or greater than 2
-  int then_subg = GetParam().first;
-  int else_subg = GetParam().second;
-
-  // When If operation's subgraph index is invalid
-
-  CircleGen cgen;
-
-  // constant buffers
-  std::vector<float> then_data{-100};
-  uint32_t then_buf = cgen.addBuffer(then_data);
-  std::vector<float> else_data{100};
-  uint32_t else_buf = cgen.addBuffer(else_data);
-
-  // primary subgraph
-  {
-    int x = cgen.addTensor({{1}, circle::TensorType_BOOL});
-    int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    cgen.addOperatorIf({{x}, {ret}}, then_subg, else_subg);
-
-    cgen.setInputsAndOutputs({x}, {ret});
-  }
-
-  // then subgraph
-  {
-    cgen.nextSubgraph();
-    int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, then_buf});
-    cgen.setInputsAndOutputs({}, {ret});
-  }
-
-  // else subgraph
-  {
-    cgen.nextSubgraph();
-    int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, else_buf});
-    cgen.setInputsAndOutputs({}, {ret});
-  }
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"cpu"});
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-INSTANTIATE_TEST_CASE_P(GenModelTest, IfWrongSubgraphIndex,
-                        ::testing::Values(std::make_pair(99, 2), std::make_pair(-1, 2),
-                                          std::make_pair(1, 99), std::make_pair(1, -99),
-                                          std::make_pair(-99, 99)));
diff --git a/tests/nnfw_api/src/one_op_tests/If.test.cc b/tests/nnfw_api/src/one_op_tests/If.test.cc
new file mode 100644
index 000000000..543d87980
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/If.test.cc
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_If)
+{
+  // The model looks just like the below pseudocode
+  //
+  // function model(x)
+  // {
+  //   if (x < 0.0)
+  //     return -100.0;
+  //   else
+  //     return 100.0;
+  // }
+
+  CircleGen cgen;
+
+  // constant buffers
+  std::vector<float> comp_data{0.0};
+  uint32_t comp_buf = cgen.addBuffer(comp_data);
+  std::vector<float> then_data{-100};
+  uint32_t then_buf = cgen.addBuffer(then_data);
+  std::vector<float> else_data{100};
+  uint32_t else_buf = cgen.addBuffer(else_data);
+
+  // primary subgraph
+  {
+    int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int comp = cgen.addTensor({{1}, circle::TensorType_FLOAT32, comp_buf});
+    int cond = cgen.addTensor({{1}, circle::TensorType_BOOL});
+    cgen.addOperatorLess({{x, comp}, {cond}});
+
+    int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    cgen.addOperatorIf({{cond}, {ret}}, 1, 2);
+
+    cgen.setInputsAndOutputs({x}, {ret});
+  }
+
+  // then subgraph
+  {
+    cgen.nextSubgraph();
+    int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, then_buf});
+    cgen.setInputsAndOutputs({}, {ret});
+  }
+
+  // else subgraph
+  {
+    cgen.nextSubgraph();
+    int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, else_buf});
+    cgen.setInputsAndOutputs({}, {ret});
+  }
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{-1.0}}, {{-100.0}}));
+  _context->addTestCase(uniformTCD<float>({{1.0}}, {{100.0}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+class IfWrongSubgraphIndex : public GenModelTest,
+                             public ::testing::WithParamInterface<std::pair<int, int>>
+{
+};
+
+TEST_P(IfWrongSubgraphIndex, neg_Test)
+{
+  // These values must be less than 0 or greater than 2
+  int then_subg = GetParam().first;
+  int else_subg = GetParam().second;
+
+  // When If operation's subgraph index is invalid
+
+  CircleGen cgen;
+
+  // constant buffers
+  std::vector<float> then_data{-100};
+  uint32_t then_buf = cgen.addBuffer(then_data);
+  std::vector<float> else_data{100};
+  uint32_t else_buf = cgen.addBuffer(else_data);
+
+  // primary subgraph
+  {
+    int x = cgen.addTensor({{1}, circle::TensorType_BOOL});
+    int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    cgen.addOperatorIf({{x}, {ret}}, then_subg, else_subg);
+
+    cgen.setInputsAndOutputs({x}, {ret});
+  }
+
+  // then subgraph
+  {
+    cgen.nextSubgraph();
+    int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, then_buf});
+    cgen.setInputsAndOutputs({}, {ret});
+  }
+
+  // else subgraph
+  {
+    cgen.nextSubgraph();
+    int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, else_buf});
+    cgen.setInputsAndOutputs({}, {ret});
+  }
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+INSTANTIATE_TEST_SUITE_P(GenModelTest, IfWrongSubgraphIndex,
+                         ::testing::Values(std::make_pair(99, 2), std::make_pair(-1, 2),
+                                           std::make_pair(1, 99), std::make_pair(1, -99),
+                                           std::make_pair(-99, 99)));
diff --git a/tests/nnfw_api/src/one_op_tests/InstanceNorm.cc b/tests/nnfw_api/src/one_op_tests/InstanceNorm.test.cc
index 6569ced21..6569ced21 100644
--- a/tests/nnfw_api/src/one_op_tests/InstanceNorm.cc
+++ b/tests/nnfw_api/src/one_op_tests/InstanceNorm.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/L2Normalization.cc b/tests/nnfw_api/src/one_op_tests/L2Normalization.test.cc
index f825fec5c..f825fec5c 100644
--- a/tests/nnfw_api/src/one_op_tests/L2Normalization.cc
+++ b/tests/nnfw_api/src/one_op_tests/L2Normalization.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/LeakyRelu.cc b/tests/nnfw_api/src/one_op_tests/LeakyRelu.test.cc
index cb3af4ee2..cb3af4ee2 100644
--- a/tests/nnfw_api/src/one_op_tests/LeakyRelu.cc
+++ b/tests/nnfw_api/src/one_op_tests/LeakyRelu.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Less.test.cc b/tests/nnfw_api/src/one_op_tests/Less.test.cc
new file mode 100644
index 000000000..6f76465ae
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Less.test.cc
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct LessVariationParam
+{
+  TestCaseData tcd;
+  circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+  const std::vector<std::string> backends = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class LessVariation : public GenModelTest, public ::testing::WithParamInterface<LessVariationParam>
+{
+};
+
+// Input shape:
+//   Base: {1, 2, 2, 1}
+//   Brodcast: {1} on of two input
+// Output shape: {1, 2, 2, 1}
+// Input type: Non-quantization type
+// Output type: BOOL
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(GenModelTest, LessVariation,
+                         ::testing::Values(
+                           // Float type
+                           LessVariationParam{TestCaseData{}
+                                                .addInput<float>({0.1, 0.3, 0.2, 0.7})
+                                                .addInput<float>({0.1, 0.2, 0.3, 0.4})
+                                                .addOutput<bool>({false, false, true, false})},
+                           // Float type - broadcast
+                           LessVariationParam{TestCaseData{}
+                                                .addInput<float>({0.1, 0.3, 0.2, 0.7})
+                                                .addInput<float>({0.3})
+                                                .addOutput<bool>({true, false, true, false})},
+                           // Int32 type
+                           LessVariationParam{TestCaseData{}
+                                                .addInput<int32_t>({1, 3, 2, 7})
+                                                .addInput<int32_t>({1, 2, 3, 4})
+                                                .addOutput<bool>({false, false, true, false}),
+                                              circle::TensorType::TensorType_INT32},
+                           // Int32 type - broadcast
+                           LessVariationParam{TestCaseData{}
+                                                .addInput<int32_t>({1, 3, 2, 7})
+                                                .addInput<int32_t>({5})
+                                                .addOutput<bool>({true, true, true, false}),
+                                              circle::TensorType::TensorType_INT32},
+                           // Int64 type
+                           // NYI: acl backend
+                           LessVariationParam{TestCaseData{}
+                                                .addInput<int64_t>({1, 3, -2, 7})
+                                                .addInput<int64_t>({1, 2, 3, 4})
+                                                .addOutput<bool>({false, false, true, false}),
+                                              circle::TensorType::TensorType_INT64,
+                                              {"cpu"}},
+                           // Int64 type - broadcast
+                           // NYI: acl backend
+                           LessVariationParam{TestCaseData{}
+                                                .addInput<int64_t>({1, 3, -2, 7})
+                                                .addInput<int64_t>({1})
+                                                .addOutput<bool>({false, false, true, false}),
+                                              circle::TensorType::TensorType_INT64,
+                                              {"cpu"}}));
+
+TEST_P(LessVariation, Test)
+{
+  auto &param = GetParam();
+
+  auto lhs_data = param.tcd.inputs.at(0);
+  auto rhs_data = param.tcd.inputs.at(1);
+
+  bool broadcast_lhs = false;
+  bool broadcast_rhs = false;
+  if (lhs_data.size() != rhs_data.size())
+  {
+    if (lhs_data.size() < rhs_data.size())
+      broadcast_lhs = true;
+    else
+      broadcast_rhs = true;
+  }
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_BOOL;
+
+  int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int out = cgen.addTensor({{1, 2, 2, 1}, output_type});
+  cgen.addOperatorLess({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends(param.backends);
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Less_DifferentType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+  cgen.addOperatorLess({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Less_InvalidType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorLess({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/LessEqual.test.cc b/tests/nnfw_api/src/one_op_tests/LessEqual.test.cc
new file mode 100644
index 000000000..e0e6d6698
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/LessEqual.test.cc
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct LessEqualVariationParam
+{
+  TestCaseData tcd;
+  circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+  const std::vector<std::string> backends = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class LessEqualVariation : public GenModelTest,
+                           public ::testing::WithParamInterface<LessEqualVariationParam>
+{
+};
+
+// Input shape:
+//   Base: {1, 2, 2, 1}
+//   Brodcast: {1} on of two input
+// Output shape: {1, 2, 2, 1}
+// Input type: Non-quantization type
+// Output type: BOOL
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(GenModelTest, LessEqualVariation,
+                         ::testing::Values(
+                           // Float type
+                           LessEqualVariationParam{TestCaseData{}
+                                                     .addInput<float>({0.1, 0.3, 0.2, 0.7})
+                                                     .addInput<float>({0.1, 0.2, 0.3, 0.4})
+                                                     .addOutput<bool>({true, false, true, false})},
+                           // Float type - broadcast
+                           LessEqualVariationParam{TestCaseData{}
+                                                     .addInput<float>({0.1, 0.3, 0.2, 0.7})
+                                                     .addInput<float>({0.3})
+                                                     .addOutput<bool>({true, true, true, false})},
+                           // Int32 type
+                           LessEqualVariationParam{TestCaseData{}
+                                                     .addInput<int32_t>({1, 3, 2, 7})
+                                                     .addInput<int32_t>({1, 2, 3, 4})
+                                                     .addOutput<bool>({true, false, true, false}),
+                                                   circle::TensorType::TensorType_INT32},
+                           // Int32 type - broadcast
+                           LessEqualVariationParam{TestCaseData{}
+                                                     .addInput<int32_t>({1, 3, 2, 7})
+                                                     .addInput<int32_t>({5})
+                                                     .addOutput<bool>({true, true, true, false}),
+                                                   circle::TensorType::TensorType_INT32},
+                           // Int64 type
+                           // NYI: acl backend
+                           LessEqualVariationParam{TestCaseData{}
+                                                     .addInput<int64_t>({1, 3, -2, 7})
+                                                     .addInput<int64_t>({1, 2, 3, 4})
+                                                     .addOutput<bool>({true, false, true, false}),
+                                                   circle::TensorType::TensorType_INT64,
+                                                   {"cpu"}},
+                           // Int64 type - broadcast
+                           // NYI: acl backend
+                           LessEqualVariationParam{TestCaseData{}
+                                                     .addInput<int64_t>({1, 3, -2, 7})
+                                                     .addInput<int64_t>({1})
+                                                     .addOutput<bool>({true, false, true, false}),
+                                                   circle::TensorType::TensorType_INT64,
+                                                   {"cpu"}}));
+
+TEST_P(LessEqualVariation, Test)
+{
+  auto &param = GetParam();
+
+  auto lhs_data = param.tcd.inputs.at(0);
+  auto rhs_data = param.tcd.inputs.at(1);
+
+  bool broadcast_lhs = false;
+  bool broadcast_rhs = false;
+  if (lhs_data.size() != rhs_data.size())
+  {
+    if (lhs_data.size() < rhs_data.size())
+      broadcast_lhs = true;
+    else
+      broadcast_rhs = true;
+  }
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_BOOL;
+
+  int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int out = cgen.addTensor({{1, 2, 2, 1}, output_type});
+  cgen.addOperatorLessEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends(param.backends);
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_LessEqual_DifferentType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+  cgen.addOperatorLessEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_LessEqual_InvalidType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorLessEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/LogSoftmax.cc b/tests/nnfw_api/src/one_op_tests/LogSoftmax.test.cc
index 5834fa53a..5834fa53a 100644
--- a/tests/nnfw_api/src/one_op_tests/LogSoftmax.cc
+++ b/tests/nnfw_api/src/one_op_tests/LogSoftmax.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Mean.cc b/tests/nnfw_api/src/one_op_tests/Mean.test.cc
index 6293d3837..6293d3837 100644
--- a/tests/nnfw_api/src/one_op_tests/Mean.cc
+++ b/tests/nnfw_api/src/one_op_tests/Mean.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Mul.cc b/tests/nnfw_api/src/one_op_tests/Mul.test.cc
index 0c7944613..0c7944613 100644
--- a/tests/nnfw_api/src/one_op_tests/Mul.cc
+++ b/tests/nnfw_api/src/one_op_tests/Mul.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Neg.cc b/tests/nnfw_api/src/one_op_tests/Neg.test.cc
index 7bc0cc452..7bc0cc452 100644
--- a/tests/nnfw_api/src/one_op_tests/Neg.cc
+++ b/tests/nnfw_api/src/one_op_tests/Neg.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/NotEqual.test.cc b/tests/nnfw_api/src/one_op_tests/NotEqual.test.cc
new file mode 100644
index 000000000..6a3fec150
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/NotEqual.test.cc
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct NotEqualVariationParam
+{
+  TestCaseData tcd;
+  circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+  const std::vector<std::string> backends = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class NotEqualVariation : public GenModelTest,
+                          public ::testing::WithParamInterface<NotEqualVariationParam>
+{
+};
+
+// Input shape:
+//   Base: {1, 2, 2, 1}
+//   Brodcast: {1} on of two input
+// Output shape: {1, 2, 2, 1}
+// Input type: Non-quantization type
+// Output type: BOOL
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(GenModelTest, NotEqualVariation,
+                         ::testing::Values(
+                           // Float type
+                           NotEqualVariationParam{TestCaseData{}
+                                                    .addInput<float>({0.1, 0.3, 0.5, 0.7})
+                                                    .addInput<float>({0.1, 0.2, 0.3, 0.4})
+                                                    .addOutput<bool>({false, true, true, true})},
+                           // Float type - broadcast
+                           NotEqualVariationParam{TestCaseData{}
+                                                    .addInput<float>({0.1, 0.3, 0.5, 0.7})
+                                                    .addInput<float>({0.3})
+                                                    .addOutput<bool>({true, false, true, true})},
+                           // Int32 type
+                           NotEqualVariationParam{TestCaseData{}
+                                                    .addInput<int32_t>({1, 3, 5, 7})
+                                                    .addInput<int32_t>({1, 2, 3, 4})
+                                                    .addOutput<bool>({false, true, true, true}),
+                                                  circle::TensorType::TensorType_INT32},
+                           // Int32 type - broadcast
+                           NotEqualVariationParam{TestCaseData{}
+                                                    .addInput<int32_t>({1, 3, 5, 7})
+                                                    .addInput<int32_t>({5})
+                                                    .addOutput<bool>({true, true, false, true}),
+                                                  circle::TensorType::TensorType_INT32},
+                           // Int64 type
+                           // NYI: acl backend
+                           NotEqualVariationParam{TestCaseData{}
+                                                    .addInput<int64_t>({1, 3, 5, 7})
+                                                    .addInput<int64_t>({1, 2, 3, 4})
+                                                    .addOutput<bool>({false, true, true, true}),
+                                                  circle::TensorType::TensorType_INT64,
+                                                  {"cpu"}},
+                           // Int64 type - broadcast
+                           // NYI: acl backend
+                           NotEqualVariationParam{TestCaseData{}
+                                                    .addInput<int64_t>({1, 3, 5, 7})
+                                                    .addInput<int64_t>({1})
+                                                    .addOutput<bool>({false, true, true, true}),
+                                                  circle::TensorType::TensorType_INT64,
+                                                  {"cpu"}},
+                           // Bool type
+                           NotEqualVariationParam{TestCaseData{}
+                                                    .addInput<bool>({false, false, true, true})
+                                                    .addInput<bool>({false, true, false, true})
+                                                    .addOutput<bool>({false, true, true, false}),
+                                                  circle::TensorType::TensorType_BOOL},
+                           // Bool type - broadcast
+                           NotEqualVariationParam{TestCaseData{}
+                                                    .addInput<bool>({false, false, true, true})
+                                                    .addInput<bool>({false})
+                                                    .addOutput<bool>({false, false, true, true}),
+                                                  circle::TensorType::TensorType_BOOL}
+
+                           ));
+
+TEST_P(NotEqualVariation, Test)
+{
+  auto &param = GetParam();
+
+  auto lhs_data = param.tcd.inputs.at(0);
+  auto rhs_data = param.tcd.inputs.at(1);
+
+  bool broadcast_lhs = false;
+  bool broadcast_rhs = false;
+  if (lhs_data.size() != rhs_data.size())
+  {
+    if (lhs_data.size() < rhs_data.size())
+      broadcast_lhs = true;
+    else
+      broadcast_rhs = true;
+  }
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_BOOL;
+
+  int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int out = cgen.addTensor({{1, 2, 2, 1}, output_type});
+  cgen.addOperatorNotEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends(param.backends);
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_NotEqual_DifferentType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+  cgen.addOperatorNotEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_NotEqual_InvalidType)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorNotEqual({{lhs, rhs}, {out}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/OneHot.cc b/tests/nnfw_api/src/one_op_tests/OneHot.test.cc
index 78ad35b40..78ad35b40 100644
--- a/tests/nnfw_api/src/one_op_tests/OneHot.cc
+++ b/tests/nnfw_api/src/one_op_tests/OneHot.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Pad.cc b/tests/nnfw_api/src/one_op_tests/Pad.cc
deleted file mode 100644
index c376c1c02..000000000
--- a/tests/nnfw_api/src/one_op_tests/Pad.cc
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-// Input shape: {1, 2, 2, 1}
-// Padding: {0, 0, 1, 1, 1, 1, 0, 0}
-// Output shape: {1, 4, 4, 1}
-struct PadParam
-{
-  TestCaseData tcd;
-  circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
-  float scale = 0.0f;
-  int64_t zero_point = 0;
-};
-
-class PadVariation : public GenModelTest, public ::testing::WithParamInterface<PadParam>
-{
-};
-
-// Test with different value type
-INSTANTIATE_TEST_CASE_P(
-  GenModelTest, PadVariation,
-  ::testing::Values(
-    // float value
-    PadParam{uniformTCD<float>({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}})},
-    // uint8 value
-    PadParam{
-      uniformTCD<uint8_t>({{1, 2, 3, 4}}, {{8, 8, 8, 8, 8, 1, 2, 8, 8, 3, 4, 8, 8, 8, 8, 8}}),
-      circle::TensorType::TensorType_UINT8, 1.0, 8},
-    // int8 value
-    PadParam{uniformTCD<int8_t>({{-2, -1, 1, 2}},
-                                {{-5, -5, -5, -5, -5, -2, -1, -5, -5, 1, 2, -5, -5, -5, -5, -5}}),
-             circle::TensorType::TensorType_INT8, 1.0, -5}));
-
-TEST_P(PadVariation, Test)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
-  std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
-  uint32_t padding_buf = cgen.addBuffer(padding_data);
-  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
-  int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
-
-  cgen.addOperatorPad({{in, padding}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(param.tcd);
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-TEST_P(PadVariation, neg_InvalidPadRank)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
-  std::vector<int32_t> padding_data{1, 1, 1, 1};
-  uint32_t padding_buf = cgen.addBuffer(padding_data);
-  int padding = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, padding_buf});
-  int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
-
-  cgen.addOperatorPad({{in, padding}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->expectFailCompile();
-
-  SUCCEED();
-}
-
-TEST_P(PadVariation, neg_InvalidPadDim0)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
-  std::vector<int32_t> padding_data{1, 1, 1, 1};
-  uint32_t padding_buf = cgen.addBuffer(padding_data);
-  int padding = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, padding_buf});
-  int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
-
-  cgen.addOperatorPad({{in, padding}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->expectFailCompile();
-
-  SUCCEED();
-}
-
-TEST_P(PadVariation, neg_InvalidPadDim1)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
-  std::vector<int32_t> padding_data{1, 1, 1, 1};
-  uint32_t padding_buf = cgen.addBuffer(padding_data);
-  int padding = cgen.addTensor({{4, 1}, circle::TensorType::TensorType_INT32, padding_buf});
-  int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
-
-  cgen.addOperatorPad({{in, padding}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-  _context->expectFailCompile();
-
-  SUCCEED();
-}
-
-TEST_P(PadVariation, neg_Type)
-{
-  auto &param = GetParam();
-
-  const circle::TensorType output_type = ((param.data_type == circle::TensorType::TensorType_UINT8)
-                                            ? circle::TensorType::TensorType_INT8
-                                            : circle::TensorType::TensorType_UINT8);
-
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
-  std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
-  uint32_t padding_buf = cgen.addBuffer(padding_data);
-  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
-  int out = cgen.addTensor({{1, 4, 4, 1}, output_type}, 1.0, 0);
-
-  cgen.addOperatorPad({{in, padding}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Pad_QuantParam)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 1);
-  std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
-  uint32_t padding_buf = cgen.addBuffer(padding_data);
-  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
-  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3);
-
-  cgen.addOperatorPad({{in, padding}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Pad.test.cc b/tests/nnfw_api/src/one_op_tests/Pad.test.cc
new file mode 100644
index 000000000..582bd84bc
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Pad.test.cc
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+// Input shape: {1, 2, 2, 1}
+// Padding: {0, 0, 1, 1, 1, 1, 0, 0}
+// Output shape: {1, 4, 4, 1}
+struct PadParam
+{
+  TestCaseData tcd;
+  circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+  float scale = 0.0f;
+  int64_t zero_point = 0;
+};
+
+class PadVariation : public GenModelTest, public ::testing::WithParamInterface<PadParam>
+{
+};
+
+// Test with different value type
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, PadVariation,
+  ::testing::Values(
+    // float value
+    PadParam{uniformTCD<float>({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}})},
+    // uint8 value
+    PadParam{
+      uniformTCD<uint8_t>({{1, 2, 3, 4}}, {{8, 8, 8, 8, 8, 1, 2, 8, 8, 3, 4, 8, 8, 8, 8, 8}}),
+      circle::TensorType::TensorType_UINT8, 1.0, 8},
+    // int8 value
+    PadParam{uniformTCD<int8_t>({{-2, -1, 1, 2}},
+                                {{-5, -5, -5, -5, -5, -2, -1, -5, -5, 1, 2, -5, -5, -5, -5, -5}}),
+             circle::TensorType::TensorType_INT8, 1.0, -5}));
+
+TEST_P(PadVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+  std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
+
+  cgen.addOperatorPad({{in, padding}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_P(PadVariation, neg_InvalidPadRank)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+  std::vector<int32_t> padding_data{1, 1, 1, 1};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, padding_buf});
+  int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
+
+  cgen.addOperatorPad({{in, padding}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_P(PadVariation, neg_InvalidPadDim0)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+  std::vector<int32_t> padding_data{1, 1, 1, 1};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
+
+  cgen.addOperatorPad({{in, padding}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_P(PadVariation, neg_InvalidPadDim1)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+  std::vector<int32_t> padding_data{1, 1, 1, 1};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 1}, circle::TensorType::TensorType_INT32, padding_buf});
+  int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
+
+  cgen.addOperatorPad({{in, padding}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_P(PadVariation, neg_Type)
+{
+  auto &param = GetParam();
+
+  const circle::TensorType output_type = ((param.data_type == circle::TensorType::TensorType_UINT8)
+                                            ? circle::TensorType::TensorType_INT8
+                                            : circle::TensorType::TensorType_UINT8);
+
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+  std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  int out = cgen.addTensor({{1, 4, 4, 1}, output_type}, 1.0, 0);
+
+  cgen.addOperatorPad({{in, padding}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Pad_QuantParam)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 1);
+  std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3);
+
+  cgen.addOperatorPad({{in, padding}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/PadV2.cc b/tests/nnfw_api/src/one_op_tests/PadV2.test.cc
index 3db2187b2..3db2187b2 100644
--- a/tests/nnfw_api/src/one_op_tests/PadV2.cc
+++ b/tests/nnfw_api/src/one_op_tests/PadV2.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Quantize.cc b/tests/nnfw_api/src/one_op_tests/Quantize.test.cc
index 5ab4d6297..5ab4d6297 100644
--- a/tests/nnfw_api/src/one_op_tests/Quantize.cc
+++ b/tests/nnfw_api/src/one_op_tests/Quantize.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Rank.cc b/tests/nnfw_api/src/one_op_tests/Rank.test.cc
index 60ec1931a..60ec1931a 100644
--- a/tests/nnfw_api/src/one_op_tests/Rank.cc
+++ b/tests/nnfw_api/src/one_op_tests/Rank.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Reduce.cc b/tests/nnfw_api/src/one_op_tests/Reduce.cc
deleted file mode 100644
index bdcc5c225..000000000
--- a/tests/nnfw_api/src/one_op_tests/Reduce.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-#include <memory>
-
-CircleBuffer genSimpleReduceModel(circle::BuiltinOperator op, bool keep_dims)
-{
-  CircleGen cgen;
-  uint32_t axis_buf = cgen.addBuffer(std::vector<int32_t>{0, 1, 2, 3});
-  int in = cgen.addTensor({{2, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
-  int axis = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, axis_buf});
-  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorReduce({{in, axis}, {out}}, op, keep_dims);
-  cgen.setInputsAndOutputs({in}, {out});
-  return cgen.finish();
-}
-
-TEST_F(GenModelTest, OneOp_ReduceMax)
-{
-  auto model = genSimpleReduceModel(circle::BuiltinOperator_REDUCE_MAX, false);
-  _context = std::make_unique<GenModelTestContext>(std::move(model));
-  _context->addTestCase(uniformTCD<float>({{1, 2, 3, 4, 5, 6}}, {{6}}));
-  _context->addTestCase(uniformTCD<float>({{100, 98, 55, 200, 3, 40}}, {{200}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-class ReduceMaxBadIndex : public GenModelTest,
-                          public ::testing::WithParamInterface<std::vector<int>>
-{
-};
-
-TEST_P(ReduceMaxBadIndex, neg_Test)
-{
-  CircleGen cgen;
-  // Axis cannot be equal or bigger than input's rank - 4
-  uint32_t axis_buf = cgen.addBuffer(GetParam());
-  int in = cgen.addTensor({{2, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
-  int axis = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, axis_buf});
-  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorReduce({{in, axis}, {out}}, circle::BuiltinOperator_REDUCE_MAX, false);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailCompile();
-
-  SUCCEED();
-}
-
-INSTANTIATE_TEST_CASE_P(GenModelTest, ReduceMaxBadIndex,
-                        ::testing::Values(std::vector<int32_t>{0, 1, 2, 4},
-                                          std::vector<int32_t>{0, -5, 2, 3},
-                                          std::vector<int32_t>{-88, 1, 2, 3},
-                                          std::vector<int32_t>{0, 1, 88, 3}));
diff --git a/tests/nnfw_api/src/one_op_tests/Reduce.test.cc b/tests/nnfw_api/src/one_op_tests/Reduce.test.cc
new file mode 100644
index 000000000..13d180aed
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Reduce.test.cc
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+CircleBuffer genSimpleReduceModel(circle::BuiltinOperator op, bool keep_dims)
+{
+  CircleGen cgen;
+  uint32_t axis_buf = cgen.addBuffer(std::vector<int32_t>{0, 1, 2, 3});
+  int in = cgen.addTensor({{2, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
+  int axis = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, axis_buf});
+  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorReduce({{in, axis}, {out}}, op, keep_dims);
+  cgen.setInputsAndOutputs({in}, {out});
+  return cgen.finish();
+}
+
+TEST_F(GenModelTest, OneOp_ReduceMax)
+{
+  auto model = genSimpleReduceModel(circle::BuiltinOperator_REDUCE_MAX, false);
+  _context = std::make_unique<GenModelTestContext>(std::move(model));
+  _context->addTestCase(uniformTCD<float>({{1, 2, 3, 4, 5, 6}}, {{6}}));
+  _context->addTestCase(uniformTCD<float>({{100, 98, 55, 200, 3, 40}}, {{200}}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+class ReduceMaxBadIndex : public GenModelTest,
+                          public ::testing::WithParamInterface<std::vector<int>>
+{
+};
+
+TEST_P(ReduceMaxBadIndex, neg_Test)
+{
+  CircleGen cgen;
+  // Axis cannot be equal or bigger than input's rank - 4
+  uint32_t axis_buf = cgen.addBuffer(GetParam());
+  int in = cgen.addTensor({{2, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
+  int axis = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, axis_buf});
+  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorReduce({{in, axis}, {out}}, circle::BuiltinOperator_REDUCE_MAX, false);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+INSTANTIATE_TEST_SUITE_P(GenModelTest, ReduceMaxBadIndex,
+                         ::testing::Values(std::vector<int32_t>{0, 1, 2, 4},
+                                           std::vector<int32_t>{0, -5, 2, 3},
+                                           std::vector<int32_t>{-88, 1, 2, 3},
+                                           std::vector<int32_t>{0, 1, 88, 3}));
diff --git a/tests/nnfw_api/src/one_op_tests/Relu.cc b/tests/nnfw_api/src/one_op_tests/Relu.test.cc
index 28c511270..28c511270 100644
--- a/tests/nnfw_api/src/one_op_tests/Relu.cc
+++ b/tests/nnfw_api/src/one_op_tests/Relu.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Relu6.cc b/tests/nnfw_api/src/one_op_tests/Relu6.test.cc
index 88b8eba83..88b8eba83 100644
--- a/tests/nnfw_api/src/one_op_tests/Relu6.cc
+++ b/tests/nnfw_api/src/one_op_tests/Relu6.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/ResizeBilinear.cc b/tests/nnfw_api/src/one_op_tests/ResizeBilinear.cc
deleted file mode 100644
index 5db08f168..000000000
--- a/tests/nnfw_api/src/one_op_tests/ResizeBilinear.cc
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-#include <memory>
-
-struct ResizeBilinearParam
-{
-  TestCaseData tcd;
-  circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
-  float scale = 0.0f;
-  int64_t zero_point = 0;
-};
-
-class ResizeBilinearVariation : public GenModelTest,
-                                public ::testing::WithParamInterface<ResizeBilinearParam>
-{
-};
-
-TEST_P(ResizeBilinearVariation, Test)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-  std::vector<int32_t> size_data{3, 3};
-  uint32_t size_buf = cgen.addBuffer(size_data);
-  int size = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, size_buf});
-  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
-  int out = cgen.addTensor({{1, 3, 3, 1}, param.data_type}, param.scale, param.zero_point);
-  cgen.addOperatorResizeBilinear({{in, size}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(param.tcd);
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
-INSTANTIATE_TEST_CASE_P(
-  GenModelTest, ResizeBilinearVariation,
-  ::testing::Values(
-    // float value
-    ResizeBilinearParam{uniformTCD<float>({{1, 1, 2, 2}}, {{1, 1, 1, 1.666666667, 1.666666667,
-                                                            1.666666667, 2, 2, 2}})},
-    // uint8 value
-    ResizeBilinearParam{uniformTCD<uint8_t>({{3, 6, 9, 12}}, {{3, 5, 6, 7, 9, 10, 9, 11, 12}}),
-                        circle::TensorType::TensorType_UINT8, 1.0, 0},
-    // int8 value
-    ResizeBilinearParam{uniformTCD<int8_t>({{-6, -3, 9, 12}}, {{-6, -4, -3, 4, 6, 7, 9, 11, 12}}),
-                        circle::TensorType::TensorType_INT8, 1.0, 0}));
-
-TEST_F(GenModelTest, OneOp_ResizeBilinear_SizeToVar)
-{
-  CircleGen cgen;
-  int size = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorResizeBilinear({{in, size}, {out}});
-  cgen.setInputsAndOutputs({in, size}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  // FIXME enable a test case the below is not a valid test case
-  //_context->addTestCase(TestCaseData{}.addInput<int32_t>({3, 3}).addInput<float>({1, 1, 2,
-  // 2}).addOutput<float>({1, 1, 1, 1.666666667, 1.666666667, 1.666666667, 2, 2, 2}));
-  _context->setBackends({"cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_ResizeBilinear_InvalidSizeVal)
-{
-  CircleGen cgen;
-  std::vector<int32_t> size_data{-3, 3};
-  uint32_t size_buf = cgen.addBuffer(size_data);
-  int size = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, size_buf});
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorResizeBilinear({{in, size}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"cpu"});
-  _context->expectFailCompile();
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/ResizeBilinear.test.cc b/tests/nnfw_api/src/one_op_tests/ResizeBilinear.test.cc
new file mode 100644
index 000000000..fe313d4e7
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/ResizeBilinear.test.cc
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+struct ResizeBilinearParam
+{
+  TestCaseData tcd;
+  circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+  float scale = 0.0f;
+  int64_t zero_point = 0;
+};
+
+class ResizeBilinearVariation : public GenModelTest,
+                                public ::testing::WithParamInterface<ResizeBilinearParam>
+{
+};
+
+TEST_P(ResizeBilinearVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  std::vector<int32_t> size_data{3, 3};
+  uint32_t size_buf = cgen.addBuffer(size_data);
+  int size = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, size_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({{1, 3, 3, 1}, param.data_type}, param.scale, param.zero_point);
+  cgen.addOperatorResizeBilinear({{in, size}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, ResizeBilinearVariation,
+  ::testing::Values(
+    // float value
+    ResizeBilinearParam{uniformTCD<float>({{1, 1, 2, 2}}, {{1, 1, 1, 1.666666667, 1.666666667,
+                                                            1.666666667, 2, 2, 2}})},
+    // uint8 value
+    ResizeBilinearParam{uniformTCD<uint8_t>({{3, 6, 9, 12}}, {{3, 5, 6, 7, 9, 10, 9, 11, 12}}),
+                        circle::TensorType::TensorType_UINT8, 1.0, 0},
+    // int8 value
+    ResizeBilinearParam{uniformTCD<int8_t>({{-6, -3, 9, 12}}, {{-6, -4, -3, 4, 6, 7, 9, 11, 12}}),
+                        circle::TensorType::TensorType_INT8, 1.0, 0}));
+
+TEST_F(GenModelTest, OneOp_ResizeBilinear_SizeToVar)
+{
+  CircleGen cgen;
+  int size = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorResizeBilinear({{in, size}, {out}});
+  cgen.setInputsAndOutputs({in, size}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  // FIXME enable a test case the below is not a valid test case
+  //_context->addTestCase(TestCaseData{}.addInput<int32_t>({3, 3}).addInput<float>({1, 1, 2,
+  // 2}).addOutput<float>({1, 1, 1, 1.666666667, 1.666666667, 1.666666667, 2, 2, 2}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_ResizeBilinear_InvalidSizeVal)
+{
+  CircleGen cgen;
+  std::vector<int32_t> size_data{-3, 3};
+  uint32_t size_buf = cgen.addBuffer(size_data);
+  int size = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, size_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorResizeBilinear({{in, size}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc b/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.test.cc
index 1dd65844b..1dd65844b 100644
--- a/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc
+++ b/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Reverse.cc b/tests/nnfw_api/src/one_op_tests/Reverse.test.cc
index 4168b2123..4168b2123 100644
--- a/tests/nnfw_api/src/one_op_tests/Reverse.cc
+++ b/tests/nnfw_api/src/one_op_tests/Reverse.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Select.cc b/tests/nnfw_api/src/one_op_tests/Select.test.cc
index e1d991877..e1d991877 100644
--- a/tests/nnfw_api/src/one_op_tests/Select.cc
+++ b/tests/nnfw_api/src/one_op_tests/Select.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Shape.cc b/tests/nnfw_api/src/one_op_tests/Shape.test.cc
index 2a73db99a..2a73db99a 100644
--- a/tests/nnfw_api/src/one_op_tests/Shape.cc
+++ b/tests/nnfw_api/src/one_op_tests/Shape.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Slice.cc b/tests/nnfw_api/src/one_op_tests/Slice.cc
deleted file mode 100644
index 002fb0132..000000000
--- a/tests/nnfw_api/src/one_op_tests/Slice.cc
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-struct SliceVariationParam
-{
-  std::vector<int32_t> input_shape;
-  std::vector<int32_t> begins;
-  std::vector<int32_t> sizes;
-  TestCaseData tcd;
-
-  circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
-  float scale = 0.0f;
-  int64_t zero_point = 0;
-  circle::TensorType begins_type = circle::TensorType::TensorType_INT32;
-};
-
-class SliceVariation : public GenModelTest,
-                       public ::testing::WithParamInterface<SliceVariationParam>
-{
-};
-
-INSTANTIATE_TEST_CASE_P(
-  GenModelTest, SliceVariation,
-  ::testing::Values(
-    SliceVariationParam{
-      {2, 2, 3, 1},
-      {0, 1, 1, 0},
-      {1, 1, 2, 1},
-      uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}})},
-    SliceVariationParam{
-      {2, 2, 3, 1},
-      {0, 1, 1, 0},
-      {1, 1, 2, 1},
-      uniformTCD<uint8_t>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
-      circle::TensorType::TensorType_UINT8,
-      1,
-      0},
-    SliceVariationParam{
-      {2, 2, 3, 1},
-      {0, 1, 1, 0},
-      {1, 1, 2, 1},
-      uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
-      circle::TensorType::TensorType_FLOAT32,
-      0,
-      0,
-      circle::TensorType::TensorType_INT64}));
-
-TEST_P(SliceVariation, Test)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-
-  int in = cgen.addTensor({param.input_shape, param.input_type}, param.scale, param.zero_point);
-  int out = cgen.addTensor({param.sizes, param.input_type}, param.scale, param.zero_point);
-  if (param.begins_type == circle::TensorType::TensorType_INT32)
-  {
-    uint32_t begins_buf = cgen.addBuffer(param.begins);
-    int rank = param.begins.size();
-    int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
-
-    uint32_t sizes_buf = cgen.addBuffer(param.sizes);
-    int sizes = cgen.addTensor({{rank}, param.begins_type, sizes_buf});
-
-    cgen.addOperatorSlice({{in, begins, sizes}, {out}});
-  }
-  else if (param.begins_type == circle::TensorType::TensorType_INT64)
-  {
-    std::vector<int64_t> begins_64(param.begins.size());
-    std::vector<int64_t> sizes_64(param.sizes.size());
-    for (int i = 0; i < param.begins.size(); i++)
-    {
-      begins_64[i] = param.begins[i];
-      sizes_64[i] = param.sizes[i];
-    }
-
-    uint32_t begins_buf = cgen.addBuffer(begins_64);
-    int rank = param.begins.size();
-    int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
-
-    uint32_t sizes_buf = cgen.addBuffer(sizes_64);
-    int sizes = cgen.addTensor({{rank}, param.begins_type, sizes_buf});
-
-    cgen.addOperatorSlice({{in, begins, sizes}, {out}});
-  }
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(param.tcd);
-
-  // acl don't support int64 yet
-  if (param.begins_type == circle::TensorType::TensorType_INT64)
-  {
-    _context->setBackends({"cpu"});
-  }
-  else
-  {
-    _context->setBackends({"cpu", "acl_cl", "acl_neon"});
-  }
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Slice_Type)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
-  std::vector<float> begins_data = {0, 0, 1, 0};
-  uint32_t begins_buf = cgen.addBuffer(begins_data);
-  int begins = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32, begins_buf});
-  std::vector<float> sizes_data = {1, 2, 1, 1};
-  uint32_t sizes_buf = cgen.addBuffer(sizes_data);
-  int sizes = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32, sizes_buf});
-  int out = cgen.addTensor({{1, 2, 1, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorSlice({{in, begins, sizes}, {out}});
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-TEST_P(SliceVariation, neg_DiffType)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-
-  int in = cgen.addTensor({param.input_shape, param.input_type}, param.scale, param.zero_point);
-  int out = cgen.addTensor({param.sizes, param.input_type}, param.scale, param.zero_point);
-  if (param.begins_type == circle::TensorType::TensorType_INT32)
-  {
-    uint32_t begins_buf = cgen.addBuffer(param.begins);
-    std::vector<int64_t> sizes_64(param.sizes.size());
-    for (int i = 0; i < param.begins.size(); i++)
-    {
-      sizes_64[i] = param.sizes[i];
-    }
-
-    int rank = param.begins.size();
-    int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
-
-    uint32_t sizes_buf = cgen.addBuffer(sizes_64);
-    int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT64, sizes_buf});
-
-    cgen.addOperatorSlice({{in, begins, sizes}, {out}});
-  }
-  else if (param.begins_type == circle::TensorType::TensorType_INT64)
-  {
-    std::vector<int64_t> begins_64(param.begins.size());
-    for (int i = 0; i < param.begins.size(); i++)
-    {
-      begins_64[i] = param.begins[i];
-    }
-
-    uint32_t begins_buf = cgen.addBuffer(begins_64);
-    int rank = param.begins.size();
-    int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
-
-    uint32_t sizes_buf = cgen.addBuffer(param.sizes);
-    int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT32, sizes_buf});
-
-    cgen.addOperatorSlice({{in, begins, sizes}, {out}});
-  }
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Slice.test.cc b/tests/nnfw_api/src/one_op_tests/Slice.test.cc
new file mode 100644
index 000000000..8cd9d7037
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Slice.test.cc
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct SliceVariationParam
+{
+  std::vector<int32_t> input_shape;
+  std::vector<int32_t> begins;
+  std::vector<int32_t> sizes;
+  TestCaseData tcd;
+
+  circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+  float scale = 0.0f;
+  int64_t zero_point = 0;
+  circle::TensorType begins_type = circle::TensorType::TensorType_INT32;
+};
+
+class SliceVariation : public GenModelTest,
+                       public ::testing::WithParamInterface<SliceVariationParam>
+{
+};
+
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, SliceVariation,
+  ::testing::Values(
+    SliceVariationParam{
+      {2, 2, 3, 1},
+      {0, 1, 1, 0},
+      {1, 1, 2, 1},
+      uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}})},
+    SliceVariationParam{
+      {2, 2, 3, 1},
+      {0, 1, 1, 0},
+      {1, 1, 2, 1},
+      uniformTCD<uint8_t>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
+      circle::TensorType::TensorType_UINT8,
+      1,
+      0},
+    SliceVariationParam{
+      {2, 2, 3, 1},
+      {0, 1, 1, 0},
+      {1, 1, 2, 1},
+      uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
+      circle::TensorType::TensorType_FLOAT32,
+      0,
+      0,
+      circle::TensorType::TensorType_INT64}));
+
+TEST_P(SliceVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+
+  int in = cgen.addTensor({param.input_shape, param.input_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({param.sizes, param.input_type}, param.scale, param.zero_point);
+  if (param.begins_type == circle::TensorType::TensorType_INT32)
+  {
+    uint32_t begins_buf = cgen.addBuffer(param.begins);
+    int rank = param.begins.size();
+    int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+    uint32_t sizes_buf = cgen.addBuffer(param.sizes);
+    int sizes = cgen.addTensor({{rank}, param.begins_type, sizes_buf});
+
+    cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+  }
+  else if (param.begins_type == circle::TensorType::TensorType_INT64)
+  {
+    std::vector<int64_t> begins_64(param.begins.size());
+    std::vector<int64_t> sizes_64(param.sizes.size());
+    for (int i = 0; i < param.begins.size(); i++)
+    {
+      begins_64[i] = param.begins[i];
+      sizes_64[i] = param.sizes[i];
+    }
+
+    uint32_t begins_buf = cgen.addBuffer(begins_64);
+    int rank = param.begins.size();
+    int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+    uint32_t sizes_buf = cgen.addBuffer(sizes_64);
+    int sizes = cgen.addTensor({{rank}, param.begins_type, sizes_buf});
+
+    cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+  }
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+
+  // acl don't support int64 yet
+  if (param.begins_type == circle::TensorType::TensorType_INT64)
+  {
+    _context->setBackends({"cpu"});
+  }
+  else
+  {
+    _context->setBackends({"cpu", "acl_cl", "acl_neon"});
+  }
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Slice_Type)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<float> begins_data = {0, 0, 1, 0};
+  uint32_t begins_buf = cgen.addBuffer(begins_data);
+  int begins = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32, begins_buf});
+  std::vector<float> sizes_data = {1, 2, 1, 1};
+  uint32_t sizes_buf = cgen.addBuffer(sizes_data);
+  int sizes = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32, sizes_buf});
+  int out = cgen.addTensor({{1, 2, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+TEST_P(SliceVariation, neg_DiffType)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+
+  int in = cgen.addTensor({param.input_shape, param.input_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({param.sizes, param.input_type}, param.scale, param.zero_point);
+  if (param.begins_type == circle::TensorType::TensorType_INT32)
+  {
+    uint32_t begins_buf = cgen.addBuffer(param.begins);
+    std::vector<int64_t> sizes_64(param.sizes.size());
+    for (int i = 0; i < param.begins.size(); i++)
+    {
+      sizes_64[i] = param.sizes[i];
+    }
+
+    int rank = param.begins.size();
+    int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+    uint32_t sizes_buf = cgen.addBuffer(sizes_64);
+    int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT64, sizes_buf});
+
+    cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+  }
+  else if (param.begins_type == circle::TensorType::TensorType_INT64)
+  {
+    std::vector<int64_t> begins_64(param.begins.size());
+    for (int i = 0; i < param.begins.size(); i++)
+    {
+      begins_64[i] = param.begins[i];
+    }
+
+    uint32_t begins_buf = cgen.addBuffer(begins_64);
+    int rank = param.begins.size();
+    int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+    uint32_t sizes_buf = cgen.addBuffer(param.sizes);
+    int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT32, sizes_buf});
+
+    cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+  }
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Softmax.cc b/tests/nnfw_api/src/one_op_tests/Softmax.cc
deleted file mode 100644
index aba4e89a0..000000000
--- a/tests/nnfw_api/src/one_op_tests/Softmax.cc
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-// beta = 0.1
-// input/output shape: {1, 2, 1, 4}
-struct SoftmaxParam
-{
-  TestCaseData tcd;
-  circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
-  float input_scale = 0.0f;
-  int64_t input_zero_point = 0;
-};
-
-class SoftmaxVariation : public GenModelTest, public ::testing::WithParamInterface<SoftmaxParam>
-{
-};
-
-// Test with different value type
-INSTANTIATE_TEST_CASE_P(
-  GenModelTest, SoftmaxVariation,
-  ::testing::Values(
-    // float value
-    SoftmaxParam{
-      uniformTCD<float>({{0, -6, 2, 4, 3, -2, 10, 1}},
-                        {{.23463, .12877, .28658, .35003, .22528, .13664, .45365, .18443}})},
-    // uint8 value
-    SoftmaxParam{
-      uniformTCD<uint8_t>({{10, 4, 12, 14, 13, 8, 20, 11}}, {{60, 33, 73, 90, 58, 35, 116, 47}}),
-      circle::TensorType::TensorType_UINT8, 1.0, 10},
-    // int8 value
-    SoftmaxParam{
-      uniformTCD<int8_t>({{0, -6, 2, 4, 3, -2, 10, 1}}, {{-68, -95, -55, -38, -70, -93, -12, -81}}),
-      circle::TensorType::TensorType_INT8, 1.0, 0}));
-
-TEST_P(SoftmaxVariation, Test)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-
-  // NNAPI spec and tflite test use fixed output scale and zero-point
-  float out_scale = 0.0;
-  int64_t out_zero_point = 0;
-  if (param.data_type == circle::TensorType::TensorType_UINT8)
-  {
-    out_scale = 1.0f / 256;
-  }
-  else if (param.data_type == circle::TensorType::TensorType_INT8)
-  {
-    out_scale = 1.0f / 256;
-    out_zero_point = -128;
-  }
-
-  int input =
-    cgen.addTensor({{1, 2, 1, 4}, param.data_type}, param.input_scale, param.input_zero_point);
-  int out = cgen.addTensor({{1, 2, 1, 4}, param.data_type}, out_scale, out_zero_point);
-  cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
-  cgen.setInputsAndOutputs({input}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(param.tcd);
-  _context->setBackends({"cpu", "acl_neon", "acl_cl"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Softmax_Invaild_Beta)
-{
-  CircleGen cgen;
-  int input = cgen.addTensor({{4, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{4, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
-  cgen.setInputsAndOutputs({input}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>({{-1., 0., 1., 1.}}, {{-1., -1., -1., -1.}}));
-  _context->setBackends({"gpu_cl"});
-  _context->expectFailCompile();
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Softmax)
-{
-  CircleGen cgen;
-  int lhs = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorSoftmax({{lhs}, {out}}, 1.0);
-  cgen.setInputsAndOutputs({lhs}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>(
-    {{-1., 0., 1., 1.}},
-    {{0.054064586758613586, 0.14696279168128967, 0.39948627352714539, 0.39948627352714539}}));
-  _context->setBackends({"acl_cl", "cpu", "gpu_cl"});
-
-  SUCCEED();
-}
-
-TEST_P(SoftmaxVariation, neg_Type)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-  int input =
-    cgen.addTensor({{1, 2, 1, 4}, param.data_type}, param.input_scale, param.input_zero_point);
-  int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_BOOL});
-  cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
-  cgen.setInputsAndOutputs({input}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Softmax.test.cc b/tests/nnfw_api/src/one_op_tests/Softmax.test.cc
new file mode 100644
index 000000000..1782baf64
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Softmax.test.cc
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+// beta = 0.1
+// input/output shape: {1, 2, 1, 4}
+struct SoftmaxParam
+{
+  TestCaseData tcd;
+  circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+  float input_scale = 0.0f;
+  int64_t input_zero_point = 0;
+};
+
+class SoftmaxVariation : public GenModelTest, public ::testing::WithParamInterface<SoftmaxParam>
+{
+};
+
+// Test with different value type
+INSTANTIATE_TEST_SUITE_P(
+  GenModelTest, SoftmaxVariation,
+  ::testing::Values(
+    // float value
+    SoftmaxParam{
+      uniformTCD<float>({{0, -6, 2, 4, 3, -2, 10, 1}},
+                        {{.23463, .12877, .28658, .35003, .22528, .13664, .45365, .18443}})},
+    // uint8 value
+    SoftmaxParam{
+      uniformTCD<uint8_t>({{10, 4, 12, 14, 13, 8, 20, 11}}, {{60, 33, 73, 90, 58, 35, 116, 47}}),
+      circle::TensorType::TensorType_UINT8, 1.0, 10},
+    // int8 value
+    SoftmaxParam{
+      uniformTCD<int8_t>({{0, -6, 2, 4, 3, -2, 10, 1}}, {{-68, -95, -55, -38, -70, -93, -12, -81}}),
+      circle::TensorType::TensorType_INT8, 1.0, 0}));
+
+TEST_P(SoftmaxVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+
+  // NNAPI spec and tflite test use fixed output scale and zero-point
+  float out_scale = 0.0;
+  int64_t out_zero_point = 0;
+  if (param.data_type == circle::TensorType::TensorType_UINT8)
+  {
+    out_scale = 1.0f / 256;
+  }
+  else if (param.data_type == circle::TensorType::TensorType_INT8)
+  {
+    out_scale = 1.0f / 256;
+    out_zero_point = -128;
+  }
+
+  int input =
+    cgen.addTensor({{1, 2, 1, 4}, param.data_type}, param.input_scale, param.input_zero_point);
+  int out = cgen.addTensor({{1, 2, 1, 4}, param.data_type}, out_scale, out_zero_point);
+  cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
+  cgen.setInputsAndOutputs({input}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends({"cpu", "acl_neon", "acl_cl"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Softmax_Invaild_Beta)
+{
+  CircleGen cgen;
+  int input = cgen.addTensor({{4, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{4, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
+  cgen.setInputsAndOutputs({input}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{-1., 0., 1., 1.}}, {{-1., -1., -1., -1.}}));
+  _context->setBackends({"gpu_cl"});
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Softmax)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorSoftmax({{lhs}, {out}}, 1.0);
+  cgen.setInputsAndOutputs({lhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>(
+    {{-1., 0., 1., 1.}},
+    {{0.054064586758613586, 0.14696279168128967, 0.39948627352714539, 0.39948627352714539}}));
+  _context->setBackends({"acl_cl", "cpu", "gpu_cl"});
+
+  SUCCEED();
+}
+
+TEST_P(SoftmaxVariation, neg_Type)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int input =
+    cgen.addTensor({{1, 2, 1, 4}, param.data_type}, param.input_scale, param.input_zero_point);
+  int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_BOOL});
+  cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
+  cgen.setInputsAndOutputs({input}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Split.cc b/tests/nnfw_api/src/one_op_tests/Split.test.cc
index 32be9a767..32be9a767 100644
--- a/tests/nnfw_api/src/one_op_tests/Split.cc
+++ b/tests/nnfw_api/src/one_op_tests/Split.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Sqrt.cc b/tests/nnfw_api/src/one_op_tests/Sqrt.test.cc
index 01f313371..01f313371 100644
--- a/tests/nnfw_api/src/one_op_tests/Sqrt.cc
+++ b/tests/nnfw_api/src/one_op_tests/Sqrt.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Square.cc b/tests/nnfw_api/src/one_op_tests/Square.test.cc
index 2ec9bad0d..2ec9bad0d 100644
--- a/tests/nnfw_api/src/one_op_tests/Square.cc
+++ b/tests/nnfw_api/src/one_op_tests/Square.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/StridedSlice.cc b/tests/nnfw_api/src/one_op_tests/StridedSlice.test.cc
index fb29018d4..fb29018d4 100644
--- a/tests/nnfw_api/src/one_op_tests/StridedSlice.cc
+++ b/tests/nnfw_api/src/one_op_tests/StridedSlice.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Sub.cc b/tests/nnfw_api/src/one_op_tests/Sub.test.cc
index bb4fecd2d..bb4fecd2d 100644
--- a/tests/nnfw_api/src/one_op_tests/Sub.cc
+++ b/tests/nnfw_api/src/one_op_tests/Sub.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Tile.cc b/tests/nnfw_api/src/one_op_tests/Tile.test.cc
index 3f193d5e6..3f193d5e6 100644
--- a/tests/nnfw_api/src/one_op_tests/Tile.cc
+++ b/tests/nnfw_api/src/one_op_tests/Tile.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Transpose.cc b/tests/nnfw_api/src/one_op_tests/Transpose.test.cc
index 5a92c7303..5a92c7303 100644
--- a/tests/nnfw_api/src/one_op_tests/Transpose.cc
+++ b/tests/nnfw_api/src/one_op_tests/Transpose.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/While.cc b/tests/nnfw_api/src/one_op_tests/While.cc
deleted file mode 100644
index ee0a9df46..000000000
--- a/tests/nnfw_api/src/one_op_tests/While.cc
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-#include "WhileTestModel.h"
-
-#include <memory>
-
-TEST_F(GenModelTest, OneOp_While)
-{
-  WhileModelLoop10 model;
-  _context = std::make_unique<GenModelTestContext>(std::move(model.cbuf));
-  _context->addTestCase(uniformTCD<float>({{0}}, {{100}}));
-  _context->addTestCase(uniformTCD<float>({{2}}, {{102}}));
-  _context->addTestCase(uniformTCD<float>({{22}}, {{102}}));
-  _context->addTestCase(uniformTCD<float>({{100}}, {{100}}));
-  _context->setBackends({"cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_While_github_4783)
-{
-  // The model looks just like the below pseudocode
-  //
-  // function model(x, data)
-  // {
-  //   // `data` does not do anything but passed to while's cond and body subgraphs
-  //   // to measure copy overhead between subgraphs
-  //   while (x < 100.0)
-  //   {
-  //     x = x + 1.0;
-  //   }
-  //   return (x, data)
-  // }
-
-  const int kElems = 4;
-  const std::vector<int32_t> shape{kElems};
-
-  CircleGen cgen;
-  uint32_t incr_buf = cgen.addBuffer(std::vector<float>{1});
-  uint32_t incr_data_buf = cgen.addBuffer(std::vector<float>(kElems, 1));
-  uint32_t end_buf = cgen.addBuffer(std::vector<float>{100});
-
-  // primary subgraph
-  {
-    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int d_in = cgen.addTensor({shape, circle::TensorType_FLOAT32});
-    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int d_out = cgen.addTensor({shape, circle::TensorType_FLOAT32});
-    cgen.addOperatorWhile({{x_in, d_in}, {x_out, d_out}}, 1, 2);
-    cgen.setInputsAndOutputs({x_in, d_in}, {x_out, d_out});
-  }
-
-  // cond subgraph
-  {
-    cgen.nextSubgraph();
-    int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int d = cgen.addTensor({shape, circle::TensorType_FLOAT32});
-    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
-    int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
-    cgen.addOperatorLess({{x, end}, {result}});
-    cgen.setInputsAndOutputs({x, d}, {result});
-  }
-
-  // body subgraph
-  {
-    cgen.nextSubgraph();
-    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
-    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int d_in = cgen.addTensor({shape, circle::TensorType_FLOAT32});
-    int incr_d = cgen.addTensor({shape, circle::TensorType_FLOAT32, incr_data_buf});
-    int d_out = cgen.addTensor({shape, circle::TensorType_FLOAT32});
-    cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
-    cgen.addOperatorAdd({{d_in, incr_d}, {d_out}}, circle::ActivationFunctionType_NONE);
-    cgen.setInputsAndOutputs({x_in, d_in}, {x_out, d_out});
-  }
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  std::vector<float> tc_data_in(kElems, 9);
-  std::vector<float> tc_data_out(kElems, 109);
-  _context->addTestCase(uniformTCD<float>({{0}, tc_data_in}, {{100}, tc_data_out}));
-  _context->setBackends({"cpu"});
-
-  SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_While_TwoInputs)
-{
-  // The model looks just like the below pseudocode
-  //
-  // function model(x, end)
-  // {
-  //   while (x < end)
-  //   {
-  //     x = x + 10.0
-  //   }
-  //   return x
-  // }
-
-  CircleGen cgen;
-  std::vector<float> incr_data{10};
-  uint32_t incr_buf = cgen.addBuffer(incr_data);
-
-  // primary subgraph
-  {
-    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int end_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int end_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    cgen.addOperatorWhile({{x_in, end_in}, {x_out, end_out}}, 1, 2);
-    cgen.setInputsAndOutputs({x_in, end_in}, {x_out});
-  }
-
-  // cond subgraph
-  {
-    cgen.nextSubgraph();
-    int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
-    cgen.addOperatorLess({{x, end}, {result}});
-    cgen.setInputsAndOutputs({x, end}, {result});
-  }
-
-  // body subgraph
-  {
-    cgen.nextSubgraph();
-    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
-    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
-    cgen.setInputsAndOutputs({x_in, end}, {x_out, end});
-  }
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(uniformTCD<float>({{0}, {20}}, {{20}}));
-  _context->addTestCase(uniformTCD<float>({{5}, {30}}, {{35}}));
-  _context->addTestCase(uniformTCD<float>({{20}, {10}}, {{20}}));
-  _context->setBackends({"cpu"});
-
-  SUCCEED();
-}
-
-class WhileWrongSubgraphIndex : public GenModelTest,
-                                public ::testing::WithParamInterface<std::pair<int, int>>
-{
-};
-
-TEST_P(WhileWrongSubgraphIndex, neg_Test)
-{
-  // These values must be less than 0 or greater than 2
-  int cond_subg = GetParam().first;
-  int body_subg = GetParam().second;
-
-  // When While operation's subgraph index is invalid
-
-  CircleGen cgen;
-
-  // constant buffers
-  std::vector<float> incr_data{10};
-  uint32_t incr_buf = cgen.addBuffer(incr_data);
-
-  // primary subgraph
-  {
-    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int end_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int end_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    cgen.addOperatorWhile({{x_in, end_in}, {x_out, end_out}}, cond_subg, body_subg);
-    cgen.setInputsAndOutputs({x_in, end_in}, {x_out});
-  }
-
-  // cond subgraph
-  {
-    cgen.nextSubgraph();
-    int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
-    cgen.addOperatorLess({{x, end}, {result}});
-    cgen.setInputsAndOutputs({x, end}, {result});
-  }
-
-  // body subgraph
-  {
-    cgen.nextSubgraph();
-    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
-    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
-    cgen.setInputsAndOutputs({x_in, end}, {x_out, end});
-  }
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"cpu"});
-  _context->expectFailModelLoad();
-
-  SUCCEED();
-}
-
-INSTANTIATE_TEST_CASE_P(GenModelTest, WhileWrongSubgraphIndex,
-                        ::testing::Values(std::make_pair(99, 2), std::make_pair(-1, 2),
-                                          std::make_pair(1, 99), std::make_pair(1, -99),
-                                          std::make_pair(-99, 99)));
-
-// In this test, output of WHILE and body subgraph have different data types
-TEST_F(GenModelTest, neg_while_wrong_dtype)
-{
-  CircleGen cgen;
-  std::vector<float> incr_data{10};
-  uint32_t incr_buf = cgen.addBuffer(incr_data);
-  std::vector<float> end_data{100};
-  uint32_t end_buf = cgen.addBuffer(end_data);
-
-  // primary subgraph
-  {
-    int model_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int model_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-
-    cgen.addOperatorWhile({{model_in}, {model_out}}, 1, 2);
-    cgen.setInputsAndOutputs({model_in}, {model_out});
-  }
-
-  // cond subgraph
-  {
-    cgen.nextSubgraph();
-    int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
-    int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
-    cgen.addOperatorLess({{x, end}, {result}});
-    cgen.setInputsAndOutputs({x}, {result});
-  }
-
-  // body subgraph
-  {
-    cgen.nextSubgraph();
-    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
-    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-    int cast_out = cgen.addTensor({{1}, circle::TensorType_INT32});
-    cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
-    cgen.addOperatorCast({{x_out}, {cast_out}}, circle::TensorType_FLOAT32,
-                         circle::TensorType_INT32);
-    cgen.setInputsAndOutputs({x_in}, {cast_out});
-    // output of this subgraph is INT32 but output of WHILE is FLOAT32
-  }
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  auto tc = uniformTCD<float>({{0}}, {{100}});
-  tc.expectFailRun();
-  _context->addTestCase(tc);
-  _context->setBackends({"cpu"});
-
-  SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/While.test.cc b/tests/nnfw_api/src/one_op_tests/While.test.cc
new file mode 100644
index 000000000..5c4da552c
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/While.test.cc
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+#include "WhileTestModel.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_While)
+{
+  WhileModelLoop10 model;
+  _context = std::make_unique<GenModelTestContext>(std::move(model.cbuf));
+  _context->addTestCase(uniformTCD<float>({{0}}, {{100}}));
+  _context->addTestCase(uniformTCD<float>({{2}}, {{102}}));
+  _context->addTestCase(uniformTCD<float>({{22}}, {{102}}));
+  _context->addTestCase(uniformTCD<float>({{100}}, {{100}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_While_github_4783)
+{
+  // The model looks just like the below pseudocode
+  //
+  // function model(x, data)
+  // {
+  //   // `data` does not do anything but passed to while's cond and body subgraphs
+  //   // to measure copy overhead between subgraphs
+  //   while (x < 100.0)
+  //   {
+  //     x = x + 1.0;
+  //   }
+  //   return (x, data)
+  // }
+
+  const int kElems = 4;
+  const std::vector<int32_t> shape{kElems};
+
+  CircleGen cgen;
+  uint32_t incr_buf = cgen.addBuffer(std::vector<float>{1});
+  uint32_t incr_data_buf = cgen.addBuffer(std::vector<float>(kElems, 1));
+  uint32_t end_buf = cgen.addBuffer(std::vector<float>{100});
+
+  // primary subgraph
+  {
+    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int d_in = cgen.addTensor({shape, circle::TensorType_FLOAT32});
+    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int d_out = cgen.addTensor({shape, circle::TensorType_FLOAT32});
+    cgen.addOperatorWhile({{x_in, d_in}, {x_out, d_out}}, 1, 2);
+    cgen.setInputsAndOutputs({x_in, d_in}, {x_out, d_out});
+  }
+
+  // cond subgraph
+  {
+    cgen.nextSubgraph();
+    int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int d = cgen.addTensor({shape, circle::TensorType_FLOAT32});
+    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
+    int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
+    cgen.addOperatorLess({{x, end}, {result}});
+    cgen.setInputsAndOutputs({x, d}, {result});
+  }
+
+  // body subgraph
+  {
+    cgen.nextSubgraph();
+    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
+    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int d_in = cgen.addTensor({shape, circle::TensorType_FLOAT32});
+    int incr_d = cgen.addTensor({shape, circle::TensorType_FLOAT32, incr_data_buf});
+    int d_out = cgen.addTensor({shape, circle::TensorType_FLOAT32});
+    cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
+    cgen.addOperatorAdd({{d_in, incr_d}, {d_out}}, circle::ActivationFunctionType_NONE);
+    cgen.setInputsAndOutputs({x_in, d_in}, {x_out, d_out});
+  }
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  std::vector<float> tc_data_in(kElems, 9);
+  std::vector<float> tc_data_out(kElems, 109);
+  _context->addTestCase(uniformTCD<float>({{0}, tc_data_in}, {{100}, tc_data_out}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_While_TwoInputs)
+{
+  // The model looks just like the below pseudocode
+  //
+  // function model(x, end)
+  // {
+  //   while (x < end)
+  //   {
+  //     x = x + 10.0
+  //   }
+  //   return x
+  // }
+
+  CircleGen cgen;
+  std::vector<float> incr_data{10};
+  uint32_t incr_buf = cgen.addBuffer(incr_data);
+
+  // primary subgraph
+  {
+    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    cgen.addOperatorWhile({{x_in, end_in}, {x_out, end_out}}, 1, 2);
+    cgen.setInputsAndOutputs({x_in, end_in}, {x_out});
+  }
+
+  // cond subgraph
+  {
+    cgen.nextSubgraph();
+    int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
+    cgen.addOperatorLess({{x, end}, {result}});
+    cgen.setInputsAndOutputs({x, end}, {result});
+  }
+
+  // body subgraph
+  {
+    cgen.nextSubgraph();
+    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
+    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
+    cgen.setInputsAndOutputs({x_in, end}, {x_out, end});
+  }
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{0}, {20}}, {{20}}));
+  _context->addTestCase(uniformTCD<float>({{5}, {30}}, {{35}}));
+  _context->addTestCase(uniformTCD<float>({{20}, {10}}, {{20}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+class WhileWrongSubgraphIndex : public GenModelTest,
+                                public ::testing::WithParamInterface<std::pair<int, int>>
+{
+};
+
+TEST_P(WhileWrongSubgraphIndex, neg_Test)
+{
+  // These values must be less than 0 or greater than 2
+  int cond_subg = GetParam().first;
+  int body_subg = GetParam().second;
+
+  // When While operation's subgraph index is invalid
+
+  CircleGen cgen;
+
+  // constant buffers
+  std::vector<float> incr_data{10};
+  uint32_t incr_buf = cgen.addBuffer(incr_data);
+
+  // primary subgraph
+  {
+    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    cgen.addOperatorWhile({{x_in, end_in}, {x_out, end_out}}, cond_subg, body_subg);
+    cgen.setInputsAndOutputs({x_in, end_in}, {x_out});
+  }
+
+  // cond subgraph
+  {
+    cgen.nextSubgraph();
+    int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
+    cgen.addOperatorLess({{x, end}, {result}});
+    cgen.setInputsAndOutputs({x, end}, {result});
+  }
+
+  // body subgraph
+  {
+    cgen.nextSubgraph();
+    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
+    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
+    cgen.setInputsAndOutputs({x_in, end}, {x_out, end});
+  }
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
+
+INSTANTIATE_TEST_SUITE_P(GenModelTest, WhileWrongSubgraphIndex,
+                         ::testing::Values(std::make_pair(99, 2), std::make_pair(-1, 2),
+                                           std::make_pair(1, 99), std::make_pair(1, -99),
+                                           std::make_pair(-99, 99)));
+
+// In this test, output of WHILE and body subgraph have different data types
+TEST_F(GenModelTest, neg_while_wrong_dtype)
+{
+  CircleGen cgen;
+  std::vector<float> incr_data{10};
+  uint32_t incr_buf = cgen.addBuffer(incr_data);
+  std::vector<float> end_data{100};
+  uint32_t end_buf = cgen.addBuffer(end_data);
+
+  // primary subgraph
+  {
+    int model_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int model_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+
+    cgen.addOperatorWhile({{model_in}, {model_out}}, 1, 2);
+    cgen.setInputsAndOutputs({model_in}, {model_out});
+  }
+
+  // cond subgraph
+  {
+    cgen.nextSubgraph();
+    int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
+    int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
+    cgen.addOperatorLess({{x, end}, {result}});
+    cgen.setInputsAndOutputs({x}, {result});
+  }
+
+  // body subgraph
+  {
+    cgen.nextSubgraph();
+    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
+    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int cast_out = cgen.addTensor({{1}, circle::TensorType_INT32});
+    cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
+    cgen.addOperatorCast({{x_out}, {cast_out}}, circle::TensorType_FLOAT32,
+                         circle::TensorType_INT32);
+    cgen.setInputsAndOutputs({x_in}, {cast_out});
+    // output of this subgraph is INT32 but output of WHILE is FLOAT32
+  }
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  // It is correct to call `_context->expectFailModelLoad();`, but OperationValidator does not deal
+  // with subgraphs. So it is verified by `_context->expectFailCompile(); as a workaround`
+  _context->expectFailCompile();
+
+  SUCCEED();
+}
diff --git a/tests/scripts/command/nnpkg-test b/tests/scripts/command/nnpkg-test
index a1176d153..ba712175e 100644
--- a/tests/scripts/command/nnpkg-test
+++ b/tests/scripts/command/nnpkg-test
@@ -12,6 +12,7 @@ outdir="."
 nnpkg_run=${nnpkg_run:-"nnpackage_run"}
 difftool=${difftool:-"h5diff"}
 delete_dumped_on_failure=0
+verbose_diff=0
 
 usage() {
   echo "Usage: $0 $progname [options] nnpackage_test"
@@ -27,6 +28,7 @@ usage() {
   echo "    -o   set output directory (default=$outdir)"
   echo "    -d   delete dumped file on failure."
   echo "         (dumped file are always deleted on success) (default=$delete_dumped_on_failure)"
+  echo "    -v   verbose result diff (default=$verbose_diff)"
   echo ""
   echo "Environment variables:"
   echo "   nnpackage_run    path to nnpackage_run (default=nnpackage_run)"
@@ -43,12 +45,13 @@ if [ $# -eq 0 ]; then
   exit 1
 fi
 
-while getopts "hdi:o:" OPTION; do
+while getopts "hdi:o:v" OPTION; do
 case "${OPTION}" in
     h) usage;;
     d) delete_dumped_on_failure=1;;
     i) indir=$OPTARG;;
     o) outdir=$OPTARG;;
+    v) verbose_diff=1;;
     ?) exit 1;;
 esac
 done
@@ -110,8 +113,8 @@ echo -n "[Compare] $nnpkg "
 test_fail()
 {
   echo -e "\tFail"
-  [ $delete_dumped_on_failure ] && rm "$dumped"
-  cat "$dumped.log"
+  [ $delete_dumped_on_failure -eq 1 ] && rm "$dumped"
+  [ $verbose_diff -eq 1 ] && cat "$dumped.log"
   rm "$dumped.log"
   exit 3
 }
@@ -119,7 +122,7 @@ test_fail()
 test_pass()
 {
   echo -e "\tPass"
-  cat "$dumped.log"
+  [ $verbose_diff -eq 1 ] && cat "$dumped.log"
   rm "$dumped" "$dumped.log"
 }
 
diff --git a/tests/scripts/command/prepare-model b/tests/scripts/command/prepare-model
index 5b3340813..7c6525491 100644
--- a/tests/scripts/command/prepare-model
+++ b/tests/scripts/command/prepare-model
@@ -24,8 +24,8 @@ function Usage()
     echo "Usage: $0 $(basename ${BASH_SOURCE[0]}) [OPTIONS]"
     echo ""
     echo "Options:"
-    echo "      --ignoremd5                         Ignore MD5 check when download model files"
-    echo "      --model=(all|nnpackage|tflite)      Download test model (deprecated option: always all)"
+    echo "          --ignoremd5     Ignore MD5 check when download model files"
+    echo "      -h, --help          Display this help message and exit"
 }
 
 for i in "$@"
@@ -38,9 +38,6 @@ do
         --ignoremd5)
             MD5_CHECK="off"
             ;;
-        --model=*)
-            # deprecated
-            ;;
         *)
             echo "Unknown option: $i"
             exit 1
@@ -49,9 +46,10 @@ do
     shift
 done
 
-# Default download server url
+# Check MODELFILE_SERVER
 if [[ -z "$MODELFILE_SERVER" ]]; then
-    export MODELFILE_SERVER="http://npu.mooo.com/archive/tflite_test_model/"
+    echo "Fail to download models: Please set MODELFILE_SERVER to download model"
+    exit 1
 fi
 echo "Download from $MODELFILE_SERVER"
 
diff --git a/tests/tools/nnpackage_run/src/nnpackage_run.cc b/tests/tools/nnpackage_run/src/nnpackage_run.cc
index 71d8b5977..7a58053f3 100644
--- a/tests/tools/nnpackage_run/src/nnpackage_run.cc
+++ b/tests/tools/nnpackage_run/src/nnpackage_run.cc
@@ -29,6 +29,7 @@
 #include "ruy/profiler/profiler.h"
 #endif
 
+#include <boost/program_options.hpp>
 #include <cassert>
 #include <chrono>
 #include <cstdlib>
@@ -313,6 +314,11 @@ int main(const int argc, char **argv)
 
     return 0;
   }
+  catch (boost::program_options::error &e)
+  {
+    std::cerr << "E: " << e.what() << std::endl;
+    exit(-1);
+  }
   catch (std::runtime_error &e)
   {
     std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;
diff --git a/tests/tools/nnpackage_run/src/rawformatter.cc b/tests/tools/nnpackage_run/src/rawformatter.cc
index f90018e56..e4b977485 100644
--- a/tests/tools/nnpackage_run/src/rawformatter.cc
+++ b/tests/tools/nnpackage_run/src/rawformatter.cc
@@ -29,14 +29,13 @@ void RawFormatter::loadInputs(const std::string &filename, std::vector<Allocatio
   uint32_t num_inputs;
   NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
 
-  // TODO: Support multiple inputs
-  // Option 1. Get comman-separated input file list like --load:raw in.0,in.1,in.2
-  // Option 2. Get prefix --load:raw out
-  //           Internally access out.0, out.1, out.2, ... out.{N} where N is determined by api.
-  if (num_inputs != 1)
-  {
-    throw std::runtime_error("Only 1 input is supported for raw input");
-  }
+  // Support multiple inputs
+  // Option 1: Get comman-separated input file list like --load:raw a,b,c
+  // Option 2: Get prefix --load:raw in
+  //           Internally access in.0, in.1, in.2, ... in.{N-1} where N is determined by nnfw info
+  //           query api.
+  //
+  // Currently Option 2 is implemented.
   try
   {
     for (uint32_t i = 0; i < num_inputs; ++i)
@@ -48,11 +47,12 @@ void RawFormatter::loadInputs(const std::string &filename, std::vector<Allocatio
       auto bufsz = bufsize_for(&ti);
       inputs[i].alloc(bufsz);
 
-      std::ifstream file(filename, std::ios::ate | std::ios::binary);
+      std::ifstream file(filename + "." + std::to_string(i), std::ios::ate | std::ios::binary);
       auto filesz = file.tellg();
       if (bufsz != filesz)
       {
-        throw std::runtime_error("Input Size does not match: " + std::to_string(bufsz) +
+        throw std::runtime_error("Input " + std::to_string(i) +
+                                 " size does not match: " + std::to_string(bufsz) +
                                  " expected, but " + std::to_string(filesz) + " provided.");
       }
       file.seekg(0, std::ios::beg);
@@ -74,12 +74,6 @@ void RawFormatter::dumpOutputs(const std::string &filename, std::vector<Allocati
 {
   uint32_t num_outputs;
   NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
-  // TODO: Support multiple outputs
-  // Available options are same.
-  if (num_outputs != 1)
-  {
-    throw std::runtime_error("Only 1 output is supported for raw input");
-  }
   try
   {
     for (uint32_t i = 0; i < num_outputs; i++)
diff --git a/tests/tools/tflite_vanilla_run/CMakeLists.txt b/tests/tools/tflite_vanilla_run/CMakeLists.txt
index a673058a4..115b2f386 100644
--- a/tests/tools/tflite_vanilla_run/CMakeLists.txt
+++ b/tests/tools/tflite_vanilla_run/CMakeLists.txt
@@ -2,12 +2,13 @@ if(NOT BUILD_TFLITE_VANILLA_RUN)
   return()
 endif()
 
-if(NOT BUILD_TENSORFLOW_LITE_2_3_0)
-  set(BUILD_TENSORFLOW_LITE_2_3_0 ON)
+if(NOT BUILD_TENSORFLOW_LITE_2_8_0)
+  set(BUILD_TENSORFLOW_LITE_2_8_0 ON)
+  set(BUILD_TENSORFLOWRUY ON)
 endif()
 
-nnfw_find_package(TensorFlowLite EXACT 2.3.0 REQUIRED)
-nnfw_find_package(Boost REQUIRED)
+nnfw_find_package(TensorFlowLite EXACT 2.8.0 REQUIRED)
+nnfw_find_package(Boost REQUIRED program_options)
 
 list(APPEND TFLITE_RUN_SRCS "src/tflite_vanilla_run.cc")
 list(APPEND TFLITE_RUN_SRCS "src/args.cc")
@@ -16,7 +17,7 @@ add_executable(tflite_vanilla_run ${TFLITE_RUN_SRCS})
 target_include_directories(tflite_vanilla_run PRIVATE src)
 target_include_directories(tflite_vanilla_run PRIVATE ${Boost_INCLUDE_DIRS})
 
-target_link_libraries(tflite_vanilla_run tensorflow-lite-2.3.0 ${LIB_PTHREAD} dl)
+target_link_libraries(tflite_vanilla_run tensorflow-lite-2.8.0 ${LIB_PTHREAD} dl)
 target_link_libraries(tflite_vanilla_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
 target_link_libraries(tflite_vanilla_run nnfw_lib_benchmark nnfw_lib_misc)
 
diff --git a/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc b/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc
index 77b5e7a37..6194b4505 100644
--- a/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc
+++ b/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc
@@ -16,6 +16,7 @@
 
 #include "tensorflow/lite/model.h"
 #include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
 
 #include "args.h"
 #include "tensor_view.h"
@@ -128,7 +129,11 @@ int main(const int argc, char **argv)
   try
   {
     phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
-      interpreter->UseNNAPI(use_nnapi);
+      if (use_nnapi)
+      {
+        // TFLite NNAPI is not worked yet
+        interpreter->ModifyGraphWithDelegate(tflite::NnApiDelegate());
+      }
       interpreter->AllocateTensors();
     });
   }
diff --git a/tools/cross/arm/sources.list.jammy b/tools/cross/arm/sources.list.jammy
new file mode 100644
index 000000000..6bb045302
--- /dev/null
+++ b/tools/cross/arm/sources.list.jammy
@@ -0,0 +1,11 @@
+deb http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted universe
+deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted universe
+
+deb http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted universe
+deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted universe
+
+deb http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted
+deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted
+
+deb http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted universe multiverse
+deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted universe multiverse
diff --git a/tools/cross/arm/sources.list.xenial b/tools/cross/arm/sources.list.xenial
deleted file mode 100644
index 56fbb36a5..000000000
--- a/tools/cross/arm/sources.list.xenial
+++ /dev/null
@@ -1,11 +0,0 @@
-deb http://ports.ubuntu.com/ubuntu-ports/ xenial main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ xenial-updates main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-updates main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ xenial-backports main restricted
-deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-backports main restricted
-
-deb http://ports.ubuntu.com/ubuntu-ports/ xenial-security main restricted universe multiverse
-deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-security main restricted universe multiverse
diff --git a/tools/cross/install_rootfs.sh b/tools/cross/install_rootfs.sh
index f03d52371..16f3a5cad 100755
--- a/tools/cross/install_rootfs.sh
+++ b/tools/cross/install_rootfs.sh
@@ -3,7 +3,7 @@ usage()
 {
     echo "Usage: $0 [BuildArch] [LinuxCodeName] [--setproxy=IP] [--skipunmount]"
     echo "BuildArch can be: arm(default), aarch64 and armel"
-    echo "LinuxCodeName - optional, Code name for Linux, can be: xenial, bionic(default), focal"
+    echo "LinuxCodeName - optional, Code name for Linux, can be: bionic(default), focal, jammy"
     echo "                          If BuildArch is armel, this can be tizen(default)"
     echo "--setproxy=IP - optional, IP is the proxy server IP address or url with portnumber"
     echo "                           default no proxy. Example: --setproxy=127.1.2.3:8080"
@@ -22,12 +22,15 @@ __SkipUnmount=0
 __IsProxySet=0
 __Apt=""
 # base development support
+# install cmake to find cmake package configuration for target file system
 __UbuntuPackages="build-essential"
+__UbuntuPackages+=" cmake"
 
 # other development supports
 __UbuntuPackages+=" ocl-icd-opencl-dev"
 __UbuntuPackages+=" libhdf5-dev"
 __UbuntuPackages+=" libboost-all-dev"
+__UbuntuPackages+=" libglib2.0-dev"
 
 # symlinks fixer
 __UbuntuPackages+=" symlinks"
@@ -67,15 +70,15 @@ for i in "$@" ; do
             __UbuntuRepo=
             __LinuxCodeName=
             ;;
-        xenial)
-            __LinuxCodeName=xenial
-            ;;
         bionic)
             __LinuxCodeName=bionic
             ;;
         focal)
             __LinuxCodeName=focal
             ;;
+        jammy)
+            __LinuxCodeName=jammy
+            ;;
         --setproxy*)
             proxyip="${i#*=}"
             __Apt="Acquire::http::proxy \"http://$proxyip/\";\n"
diff --git a/tools/nnpackage_tool/gen_golden/gen_golden.py b/tools/nnpackage_tool/gen_golden/gen_golden.py
index 79c86e6d7..d555419a6 100755
--- a/tools/nnpackage_tool/gen_golden/gen_golden.py
+++ b/tools/nnpackage_tool/gen_golden/gen_golden.py
@@ -96,7 +96,7 @@ if __name__ == '__main__':
                     np.random.randint(-127, 127, this_shape).astype(np.int8))
             elif this_dtype == tf.float32:
                 input_values.append(
-                    np.random.random_sample(this_shape).astype(np.float32))
+                    (10 * np.random.random_sample(this_shape) - 5).astype(np.float32))
             elif this_dtype == tf.bool:
                 # generate random integer from [0, 2)
                 input_values.append(
@@ -142,7 +142,7 @@ if __name__ == '__main__':
                     np.random.randint(-127, 127, this_shape).astype(np.int8))
             elif this_dtype == np.float32:
                 input_values.append(
-                    np.random.random_sample(this_shape).astype(np.float32))
+                    (10 * np.random.random_sample(this_shape) - 5).astype(np.float32))
             elif this_dtype == np.bool_:
                 # generate random integer from [0, 2)
                 input_values.append(
diff --git a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
index 9374af737..5c7c35b13 100755
--- a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
+++ b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
@@ -5,24 +5,33 @@ set -eu
 progname=$(basename "${BASH_SOURCE[0]}")
 outdir="."
 name=""
-config=""
-config_src=""
+configs_src=()
+models_src=()
+configs_str=""
+models_str=""
+types_str=""
 
 usage() {
-  echo "Usage: $progname [options] modelfile"
+  echo "Usage: $progname [options]"
   echo "Convert modelfile (tflite, circle or tvn) to nnpackage."
   echo ""
   echo "Options:"
   echo "    -h   show this help"
   echo "    -o   set nnpackage output directory (default=$outdir)"
-  echo "    -p   set nnpackage output name (default=[modelfile name])"
-  echo "    -c   provide configuration file"
+  echo "    -p   set nnpackage output name (default=[1st modelfile name])"
+  echo "    -c   provide configuration files"
+  echo "    -m   provide model files"
+  echo ""
+  echo "         (Will be deprecated: if there is one remain parameter, that is model file)"
   echo ""
   echo "Examples:"
-  echo "    $progname add.tflite                  => create nnpackage 'add' in $outdir/"
-  echo "    $progname -o out add.tflite           => create nnpackage 'add' in out/"
-  echo "    $progname -o out -p addpkg add.tflite => create nnpackage 'addpkg' in out/"
-  echo "    $progname -c add.cfg add.tflite       => create nnpackage 'add' with add.cfg"
+  echo "    $progname -m add.tflite                           => create nnpackage 'add' in $outdir/"
+  echo "    $progname -o out -m add.tflite                    => create nnpackage 'add' in out/"
+  echo "    $progname -o out -p addpkg -m add.tflite          => create nnpackage 'addpkg' in out/"
+  echo "    $progname -c add.cfg -m add.tflite                => create nnpackage 'add' with add.cfg"
+  echo "    $progname -o out -p addpkg -m a1.tflite a2.tflite => create nnpackage 'addpkg' with models a1.tflite and a2.tflite in out/"
+  echo ""
+  echo "(Will be deprecated: if there is one remain parameter, that is model file)"
   exit 1
 }
 
@@ -31,58 +40,116 @@ if [ $# -eq 0 ]; then
   exit 1
 fi
 
-while getopts "ho:p:c:" OPTION; do
-case "${OPTION}" in
+while getopts "ho:p:c:m:" OPTION; do
+  case "${OPTION}" in
     h) usage;;
     o) outdir=$OPTARG;;
     p) name=$OPTARG;;
-    c) config_src=$OPTARG;;
+    c)
+      configs_src=($OPTARG)
+      until [[ $OPTIND -gt $# ]] || [[ $(eval "echo \${$OPTIND}") =~ ^-.* ]] || [ -z $(eval "echo \${$OPTIND}") ]; do
+        if [[ $OPTIND -eq $# ]] && [[ ${#models_src[@]} -eq 0 ]]; then
+          # Backward compatibility (will be deprecated)
+          # The last remain parameter is model if there is no option "-m"
+          models_src=($(eval "echo \${$OPTIND}"))
+        else
+          configs_src+=($(eval "echo \${$OPTIND}"))
+        fi
+        OPTIND=$((OPTIND + 1))
+      done
+      ;;
+    m)
+      models_src=($OPTARG)
+      until [[ $OPTIND -gt $# ]] || [[ $(eval "echo \${$OPTIND}") =~ ^-.* ]] || [ -z $(eval "echo \${$OPTIND}") ]; do
+        models_src+=($(eval "echo \${$OPTIND}"))
+        OPTIND=$((OPTIND + 1))
+      done
+      ;;
     ?) exit 1;;
-esac
+  esac
 done
 
 shift $((OPTIND-1))
 
-if [ $# -ne 1 ]; then
-  >&2 echo "error: wrong argument (no argument or too many arguments)."
-  >&2 echo "For help, type $progname -h"
-  exit 1
+# Backward compatibility (will be deprecated)
+# The last remain parameter is model if there is no option "-m"
+if [ $# -eq 1 ] && [ ${#models_src[@]} -eq 0 ]; then
+  models_src=($1)
+  shift 1
 fi
 
-modelfile=$(basename "$1")
-
-if [[ "$modelfile" != *.* ]]; then
-  >&2 echo "error: modelfile does not have extension."
-  >&2 echo "Please provide extension so that $progname can identify what type of model you use."
+if [ $# -ne 0 ]; then
+  >&2 echo "error: wrong argument (too many arguments)."
+  >&2 echo "For help, type $progname -h"
   exit 1
 fi
 
-if [ ! -e $1 ]; then
-  >&2 echo "error: "$1" does not exist."
+if [[ ${#configs_src[@]} -ne 0 ]] && [[ ${#configs_src[@]} -ne ${#models_src[@]} ]]; then
+  >&2 echo "error: when config file is provided, # of config file should be same with modelfile"
+  >&2 echo "Please provide config file for each model file, or don't provide config file."
   exit 1
 fi
 
+delim=""
+for modelpath in ${models_src[@]}
+do
+  modelfile=$(basename "$modelpath")
+
+  if [[ "$modelfile" != *.* ]]; then
+    >&2 echo "error: modelfile does not have extension."
+    >&2 echo "Please provide extension so that $progname can identify what type of model you use."
+    exit 1
+  fi
+
+  if [ ! -e $modelpath ]; then
+    >&2 echo "error: "$modelpath" does not exist."
+    exit 1
+  fi
+
+  models_str="$models_str$delim\"$modelfile\""
+  types_str="$types_str$delim\"${modelfile##*.}\""
+  delim=", "
+done
+
+delim=""
+for configpath in ${configs_src[@]}
+do
+  configfile=$(basename "$configpath")
+
+  if [ ! -e $configpath ]; then
+    >&2 echo "error: "$configpath" does not exist."
+    exit 1
+  fi
+
+  configs_str="$configs_str$delim\"$configfile\""
+  delim=", "
+done
+
 if [ -z "$name" ]; then
-  name=${modelfile%.*}
+  first_modelfile=$(basename "${models_src[0]}")
+  name=${first_modelfile%.*}
 fi
-extension=${modelfile##*.}
 
 echo "$progname: Generating nnpackage "$name" in "$outdir""
 mkdir -p "$outdir"/"$name"/metadata
 
-if [ -s "$config_src" ]; then
-  config=$(basename "$config_src")
-  cp "$config_src" "$outdir/$name/metadata/$config"
-fi
-
 cat > "$outdir"/"$name"/metadata/MANIFEST <<-EOF
 {
   "major-version" : "1",
   "minor-version" : "2",
   "patch-version" : "0",
-  "configs"     : [ "$config" ],
-  "models"      : [ "$modelfile" ],
-  "model-types" : [ "$extension" ]
+  "configs"     : [ $configs_str ],
+  "models"      : [ $models_str ],
+  "model-types" : [ $types_str ]
 }
 EOF
-cp "$1" "$outdir"/"$name"
+
+for modelpath in ${models_src[@]}
+do
+  cp "$modelpath" "$outdir"/"$name"
+done
+
+for configpath in ${configs_src[@]}
+do
+  cp "$configpath" "$outdir/$name/metadata"
+done